<a href="https://colab.research.google.com/github/ToluPaul/electricity_consumption/blob/main/Time_Series_Analysis_Electricity_consumption.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset
import torch.optim as optim
from torch.utils.data import DataLoader
!pip install torchmetrics
from torchmetrics import MeanSquaredError
import warnings
warnings.filterwarnings('ignore')



In [None]:
#Create a function to preprocess the sequential data
def create_sequence(df, seq_length):
  xs, ys = [], []
  #interate over data indices
  for i in range(len(df) - seq_length):
    #define inputs
    '''this collects all datapoints for every length in the sequence'''
    x = df.iloc[i:(i+seq_length), 1]
    #define targets
    '''this collects the last data point for the length in the sequence'''
    y = df.iloc[i+seq_length, 1]
    #append the values to the generated list
    xs.append(x)
    ys.append(y)
  #return the numpy values of the generated list
  return np.array(xs), np.array(ys)

In [None]:
#read the training set and display the length
elec_cons = pd.read_csv('/content/drive/MyDrive/electricity_consump/electricity_train.csv')
# display first five rows and the length
display(elec_cons.head(), '' , len(elec_cons))


Unnamed: 0,timestamp,consumption
0,2011-01-01 00:15:00,-0.704319
1,2011-01-01 00:30:00,-0.704319
2,2011-01-01 00:45:00,-0.678983
3,2011-01-01 01:00:00,-0.653647
4,2011-01-01 01:15:00,-0.704319


''

105215

In [None]:
#using create sequence create the input and output variables
X_train, y_train = create_sequence(elec_cons, 24*4)
print(X_train.shape, y_train.shape)
xtrain1 = np.array(elec_cons['consumption'])
print()

(105119, 96) (105119,)



In [None]:
#Create TensorDataset
dataset_train = TensorDataset(
    torch.from_numpy(X_train).float(),
    torch.from_numpy(y_train).float()
)
#print the length of the dataset
print(len(dataset_train))

105119


In [None]:
#take a look at the train features
print(X_train)

[[-0.70431852 -0.70431852 -0.67898263 ... -0.65364675 -0.72990776
  -0.70431852]
 [-0.70431852 -0.67898263 -0.65364675 ... -0.72990776 -0.70431852
  -0.70431852]
 [-0.67898263 -0.65364675 -0.70431852 ... -0.70431852 -0.70431852
  -0.70431852]
 ...
 [ 0.79759271  0.82292859  0.84826447 ... -0.93259484 -0.93259484
  -0.90725895]
 [ 0.82292859  0.84826447  0.77200346 ... -0.93259484 -0.90725895
  -0.93259484]
 [ 0.84826447  0.77200346  0.72133169 ... -0.90725895 -0.93259484
  -0.93259484]]


In [None]:
#verify that the ouput of the sequence has been removed
105215 - 96

105119

In [None]:
#check for the last datapoint in the train features
print(X_train[0][95])
print(len(X_train[0]))
'''Apparently the train features is a collection of consecutive inputs for the
instantiated sequence length'''

-0.7043185184993116
96


'Apparently the train features is a collection of consecutive inputs for the\ninstantiated sequence length'

In [None]:
#find the length of the last series in the feature variable(x)
print(len(X_train[-1]))

96


In [None]:
#check if the last point in the target features is the same as the input
print(y_train[0])

-0.7043185184993116


In [None]:
#After discovering that the shape [32,96,1] would not be sufficient to take in the total input size of 105119
#Following a gemini prompt, the next suitable inputsize less than the total is 101376,
#To achieve this, we need the training set from [0:1056]
#print(X_train[0:1056])
xtrain1 = X_train[0:101376]
ytrain1 = y_train[0:101376]

#create a new dataset
dataset_train1 = TensorDataset(
    torch.from_numpy(xtrain1).float(),
    torch.from_numpy(ytrain1).float()
)

#create a new dataloader
dataloader_train1 = DataLoader(
    dataset_train1, batch_size=32
)

In [None]:
#check if the values in the new train dataset are the same with the old training set
xtrain1[101375][95] == y_train[101376]

True

In [None]:
#check the length of the dataset
print(ytrain1)

[-0.70431852 -0.70431852 -0.65364675 ...  1.45860593  1.4076808
  1.4076808 ]


In [None]:
#create a dataloader
dataloader_train = DataLoader(
    dataset_train, batch_size=101)

In [None]:
#check the batch
batch1 = next(iter(dataloader_train))
print(len(batch1[0]))

101


In [None]:
#create a sequence-to-vector architecture using rnn
class Net(nn.Module):
  def __init__(self):
    super().__init__()
    self.rnn = nn.RNN(
        input_size=1,
        hidden_size=32,
        num_layers=2,
        batch_first=True
    )
    self.fc = nn.Linear(32,1)
  def forward(self, x):
  #instantiate the first hidden layes with zeros
    h0 = torch.zeros(2, x.size(0), 32)
    #pass the input and first hidden state into the RNN layer
    out, _ = self.rnn(x, h0)
    #select only the last output by indexing the middle dimension with -1 pass the result through the linear layer and return
    out = self.fc(out[:, -1, :])
    return out

In [None]:
#create a  sequence-to-vector architecture using LSTM(Long_Short Term Memory)
class Net1(nn.Module):
  def __init__(self):
    super().__init__()
    self.lstm = nn.LSTM(
        #set the input size to the number of inputs for each sequence
        input_size=1,
        #arbitrarily set the hidden size and number of layers
        hidden_size=32,
        num_layers=2,
        #since the batch_size is equal to the same dimension set as true(96 in this case)
        batch_first=True
    )
    self.fc = nn.Linear(32,1)
    #define the foward pass
  def forward(self, x):
    #initiate the hidden states (short and long term state[h&c]) with zero
    h0 = torch.zeros(2, x.size(0), 32)
    c0 = torch.zeros(2, x.size(0), 32)
    #pass h and c as a tuple to the lstm layer
    out, _ = self.lstm(x, (h0, c0))
    #return the last output
    out = self.fc(out[:, -1, :])
    return out

In [None]:
#create a sequence to vector architecture using GRU(Gated Recurrent Unit)
class Net2(nn.Module):
  def __init__(self):
    super().__init__()
    self.gru = nn.GRU(
        input_size=1,
        hidden_size=32,
        num_layers=2,
        batch_first=True
    )
    self.fc= nn.Linear(32,1)
  #define the forward pass
  def forward(self, x):
    h0 = torch.zeros(2, x.size(0), 32)
    out, _ = self.gru(x, h0)
    out = self.fc(out[:, -1, :])
    return out

'''Plain RNN modules aren't usually used much these days because of their short term memory
    GRU's are less complicated than LSTMs which implies less computation
    Relative performance of GRU's and LSTM's varies by use case
    Learn to try both and evaluate the result'''

"Plain RNN modules aren't usually used much these days because of their short term memory\n    GRU's are less complicated than LSTMs which implies less computation\n    Relative performance of GRU's and LSTM's varies by use case\n    Learn to try both and evaluate the result"

In [None]:
#Training RNNs
net = Net()
#set up the mse loss
criterion = nn.MSELoss()
optimizer = optim.Adam(
    net.parameters(), lr = 0.0001
)
#Write the training loop
for epoch in range(3):
  for seqs, labels in dataloader_train1:
    optimizer.zero_grad()
    #Reshape model input
    seqs = seqs.view(32, 96, 1)
    #Get model output
    outputs = net(seqs)
    #Compute loss
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
  print(f"Epoch:{epoch +1}, loss:{loss.item()}")

Epoch:1, loss:0.8350523114204407
Epoch:2, loss:0.8271373510360718
Epoch:3, loss:0.8241744041442871


In [None]:
#Training LSTMs
net1 = Net1()
#using the same criterion write the training loop
for epoch in range(3):
  for seqs, labels in dataloader_train1:
    #Reshape model input
    seqs = seqs.view(32,96,1)
    #Get the model output
    outputs = net1(seqs)
    #compute the loss
    loss = criterion(outputs, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  print(f"Epoch{epoch + 1}, loss:{loss.item()}")

Epoch1, loss:3.206156015396118
Epoch2, loss:3.206156015396118
Epoch3, loss:3.206156015396118


In [None]:
#Training GRUs
net2 = Net2()
for epoch in range(3):
  for seqs, labels in dataloader_train1:
    #step optimization
    optimizer.step()
    #Reshape model input
    seqs = seqs.view(32,96,1)
    #Get model output
    output = net2(seqs)
    #calculate the loss
    loss = criterion(output, labels)
    #instanstiate zero gradient
    optimizer.zero_grad()
    #backward propagation
    loss.backward()
  print(f"Epoch:{epoch+1}, loss:{loss.item()}")

Epoch:1, loss:3.656557083129883
Epoch:2, loss:3.656557083129883
Epoch:3, loss:3.656557083129883


In [None]:
105119 % 37


2

In [None]:
len(dataloader_train)

1041

In [None]:
#A suggested chatgpt prompt for creating an rnn dataset
'''class MyRnnDataset(torch.utils.data.Dataset):
  def __init__(self, data_path, sequence_length):
    # Load your data from the specified path
    self.data = ...  # Load your data (text, time series, etc.)
    self.sequence_length = sequence_length

  def __len__(self):
    # Return the total number of sequences in your data
    return len(self.data) // self.sequence_length  # Assuming fixed sequence length

  def __getitem__(self, idx):
    # Get a single sequence of data at the given index
    sequence = self.data[idx * self.sequence_length: (idx + 1) * self.sequence_length]
    # Preprocess the sequence (e.g., convert to tensors, numerical representation)
    processed_sequence = ...
    return processed_sequence
'''

'class MyRnnDataset(torch.utils.data.Dataset):\n  def __init__(self, data_path, sequence_length):\n    # Load your data from the specified path\n    self.data = ...  # Load your data (text, time series, etc.)\n    self.sequence_length = sequence_length\n\n  def __len__(self):\n    # Return the total number of sequences in your data\n    return len(self.data) // self.sequence_length  # Assuming fixed sequence length\n\n  def __getitem__(self, idx):\n    # Get a single sequence of data at the given index\n    sequence = self.data[idx * self.sequence_length: (idx + 1) * self.sequence_length]\n    # Preprocess the sequence (e.g., convert to tensors, numerical representation)\n    processed_sequence = ... \n    return processed_sequence\n'

In [None]:
elec_test = pd.read_csv('/content/drive/MyDrive/electricity_consump/electricity_test.csv')
elec_test.head()

Unnamed: 0,timestamp,consumption
0,2014-01-01 00:00:00,-0.932595
1,2014-01-01 00:15:00,-0.957931
2,2014-01-01 00:30:00,-0.932595
3,2014-01-01 00:45:00,-0.907259
4,2014-01-01 01:00:00,-0.881923


In [None]:
X_test, y_test = create_sequence(elec_test, 24*4)
print(X_test.shape, y_test.shape)

(34944, 96) (34944,)


In [None]:
#check if the input size is matches the shape of the data
34944 % (32*96)

1152

In [None]:
#since it doesn't match the shape of the data, reduce the data to match the input size.
if (34944-1152) % (32*96) == 0:
  print(34944 - 1152)

33792


In [None]:
#from a gemini prompt the closest number for training is 32768
xtest1 = X_test[0:32768]
ytest1 = y_test[0:32768]

#create a tensor dataset
dataset_test = TensorDataset(
    torch.from_numpy(xtest1).float(),
    torch.from_numpy(ytest1).float()
)

#create a dataloader for test set
dataloader_test = DataLoader(
    dataset_test, batch_size=32, shuffle=False
)

In [None]:
#define the mse metrics
mse = MeanSquaredError()

net.eval()
with torch.no_grad():
  for seqs, labels in dataloader_test:
    seqs = seqs.view(32, 96, 1)
    #pass the sequence to net and squeeze the result
    output = net(seqs).squeeze()
    mse(output, labels)
#compute final metric value
test_mse = mse.compute()
print(f"Test MSE: {test_mse}")


Test MSE: 0.15471692383289337


In [None]:
mse1  = MeanSquaredError()

#test for the lstm
net.eval()
with torch.no_grad():
  for seqs, labels1 in dataloader_test:
    seqs = seqs.view(32, 96, 1)
    output1 = net1(seqs).squeeze()
    mse1(output1, labels1)
test_mse1 = mse1.compute()
print(f"Test MSE: {test_mse1}")

Test MSE: 0.6347039341926575


In [None]:
mse2 = MeanSquaredError()

#test for the gru
net.eval()
with torch.no_grad():
  for seqs, labels2 in dataloader_test:
    seqs = seqs.view(32, 96, 1)
    output2 = net2(seqs).squeeze()
    mse2(output2, labels2)
test_mse2 = mse2.compute()
print(f"Test MSE: {test_mse2}")

Test MSE: 0.7061682939529419
