# Import Libraries

In [93]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Import Data from .csv

In [94]:
df_raw = pd.read_csv(r'C:\Users\zhaor\OneDrive - McMaster University\COMPSCI 4AL3\Final Project\NY-electricity-load-prediction\data\Prepared data\2020_features.csv')
df_raw.head()

Unnamed: 0,timestamp,load,weathertime,temp,wspd,pressure,precip_hrly,dow,doy,day,month,hour,minute,year
0,2020-01-01 00:00:00,1208.4,2020-01-01 00:51:00,40,14.0,29.63,0.0,2,1,1,1,0,0,2020
1,2020-01-01 00:05:00,1191.0,2020-01-01 00:51:00,40,14.0,29.63,0.0,2,1,1,1,0,5,2020
2,2020-01-01 00:10:00,1195.5,2020-01-01 00:51:00,40,14.0,29.63,0.0,2,1,1,1,0,10,2020
3,2020-01-01 00:15:00,1210.9,2020-01-01 00:51:00,40,14.0,29.63,0.0,2,1,1,1,0,15,2020
4,2020-01-01 00:20:00,1186.3,2020-01-01 00:51:00,40,14.0,29.63,0.0,2,1,1,1,0,20,2020


# Add Day Type (weekday, weekend, holiday)

In [95]:
import datetime
import holidays

def is_workday(date:datetime.date):
    """
    Determines the type of day (workday or not) for a given date.

    Args:
        date (datetime.date): The date to check.

    Returns:
        int: 1 if workday else 0.
    """
    # Get the US holidays
    us_holidays = holidays.US()

    # Check if the date is a holiday
    if date in us_holidays:
        return 0

    # Check if the date is a weekend
    if date.weekday() >= 5:  # Saturday is 5, Sunday is 6
        return 0

    # Otherwise, it's a weekday
    return 1


In [96]:
df_raw['datetime'] = pd.to_datetime(df_raw['timestamp'], format='%Y-%m-%d %H:%M:%S', utc=True)
df_raw['is_workday'] = df_raw['datetime'].apply(is_workday)

In [97]:
from sklearn.impute import SimpleImputer
df = df_raw[['timestamp', 'load', 'temp', 'is_workday']].copy()
imputer = SimpleImputer(strategy='mean')
df['load'] = imputer.fit_transform(df['load'].values.reshape(-1, 1))
del df_raw

In [98]:
df.to_csv('data.csv', index=False)

# Make Dataset

In [99]:
import torch
from torch.utils.data import Dataset
from torchvision.transforms import Compose

class TimeSeriesDataset(Dataset):
    def __init__(self, csv_file, seq_len, transform=None, target_transform=None):
        """
        Custom Dataset for multivariate time series.

        Args:
            csv_file: First column is 'timestamp' and sencond conlumn is 'load'.
            seq_length (int): Length of each sequence.
            # transform (Compose): Composition of transformations.
        """
        super(TimeSeriesDataset, self).__init__()
        self.df = pd.read_csv(csv_file)
        self.df['timestamp'] = pd.to_datetime(self.df['timestamp'])
        self.features = self.df[['load', 'temp', 'is_workday']].values
        self.targets = self.df['load'].values
        self.seq_len = seq_len
        # self.transform = transform
        # self.target_transform = target_transform
    
    def __len__(self):
        return len(self.features) - self.seq_len
    
    def __getitem__(self, idx):
        sequence = self.features[idx:idx + self.seq_len, :]
        sequence = torch.tensor(sequence, dtype=torch.float32)
        target = self.targets[idx + self.seq_len]
        target = torch.tensor(target, dtype=torch.float32)
        # if self.transform:
        #     sequence = self.transform(sequence)
        # if self.target_transform:
        #     target = self.target_transform(target)
        return sequence, target

In [100]:
data = pd.read_csv('data.csv')

In [101]:
import numpy as np
from torch.utils.data import random_split

dataset = TimeSeriesDataset('data.csv', seq_len=16)
train_val_data, test_data = random_split(dataset, [0.8, 0.2])
train_data, val_data = random_split(train_val_data, [0.8, 0.2])

In [102]:
print(train_data[0])

(tensor([[1219.6000,   62.0000,    0.0000],
        [1218.3000,   62.0000,    0.0000],
        [1207.6000,   62.0000,    0.0000],
        [1207.6000,   62.0000,    0.0000],
        [1192.0000,   62.0000,    0.0000],
        [1195.2000,   62.0000,    0.0000],
        [1196.9000,   62.0000,    0.0000],
        [1183.9000,   62.0000,    0.0000],
        [1192.4000,   62.0000,    0.0000],
        [1161.8000,   62.0000,    0.0000],
        [1158.3000,   62.0000,    0.0000],
        [1162.3000,   62.0000,    0.0000],
        [1153.7000,   61.0000,    0.0000],
        [1149.2000,   61.0000,    0.0000],
        [1144.7000,   61.0000,    0.0000],
        [1134.6000,   61.0000,    0.0000]]), tensor(1132.6000))


# Make Dataloader

In [103]:
from torch.utils.data import DataLoader

batch_size = 256
train_loader = DataLoader(train_data, batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size, shuffle=False)

# Model Implementation

In [104]:
import torch.nn as nn
import torch.nn.functional as F

import torch.nn as nn

class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1):
        super(SimpleRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))
        out = out[:, -1, :]
        out = self.fc(out)
        
        return out



# Training

In [105]:
def model_evaluation(model, criterion, data_loader, device='cuda'):

    batch_losses = []

    model.eval() # switch to evalution mode
    with torch.no_grad():
        for inputs, labels in data_loader:

            # move to GPU
            if torch.cuda.is_available():
                inputs = inputs.to(device)
                labels = labels.to(device)

            outputs = model(inputs)
            labels = labels.unsqueeze(1)
            # compute loss for this batch
            loss = criterion(outputs, labels)
            batch_losses.append(loss.item())

    model.train() # switch to training mode

    loss_mean = np.mean(batch_losses)

    return loss_mean

def training_loop(n_epochs, optimizer, model, criterion, train_loader, test_loader, verbose=False, device='cuda'):
    '''
    Return
    ------
    
    - train_loss
    - test_loss
    '''
    train_losses = []
    test_losses = []

    for n in range(n_epochs):

        for x_batch, y_batch in train_loader:
            # move to GPU
            if torch.cuda.is_available():
                x_batch = x_batch.to(device)
                y_batch = y_batch.to(device)

            outputs = model(x_batch)
            
            y_batch = y_batch.unsqueeze(1)

            loss = criterion(outputs, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        train_loss = model_evaluation(model, criterion, train_loader)
        test_loss = model_evaluation(model, criterion, test_loader)

        if (n % 10 == 0) or verbose:
            print(f'Epoch {n + 1}: Training loss {train_loss:.4f}, Validation Loss {test_loss:.4f}')
            print('----------------------------------------------------------')

        train_losses.append(train_loss)
        test_losses.append(test_loss)

    return train_losses, test_losses

In [106]:
input_size = 3  # Number of features
hidden_size = 64
num_layers = 2
num_epochs = 25

model = SimpleRNN(input_size, hidden_size, num_layers).to('cuda')
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train_losses, val_losses = training_loop(num_epochs, optimizer, model, criterion, train_loader, val_loader, verbose=True)

Epoch 1: Training loss 1815400.9252, Validation Loss 1816174.6599
----------------------------------------------------------
Epoch 2: Training loss 1767189.4382, Validation Loss 1768669.9688
----------------------------------------------------------
Epoch 3: Training loss 1722850.1111, Validation Loss 1723014.0809
----------------------------------------------------------
Epoch 4: Training loss 1677766.3411, Validation Loss 1678574.8382
----------------------------------------------------------
Epoch 5: Training loss 1634991.0474, Validation Loss 1635103.2206
----------------------------------------------------------
Epoch 6: Training loss 1591762.6041, Validation Loss 1592502.9301
----------------------------------------------------------
Epoch 7: Training loss 1549733.4614, Validation Loss 1550701.6397
----------------------------------------------------------
Epoch 8: Training loss 1508657.0335, Validation Loss 1509662.9853
----------------------------------------------------------
