In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [3]:
filepath = '../data/timeseries data/'
filenames = os.listdir(filepath)
data = np.zeros((len(filenames), 8760))

for i, filename in enumerate(filenames):
    row = pd.read_csv(filepath + filename)['Power'].values
    data[i] = row

del row

In [4]:
df = pd.read_csv('../data/labeled_data/labeled_data.csv')

In [5]:
labeled_data_1 = data[(df.Labels == 1).values]
labeled_data_2 = data[(df.Labels == 2).values]
labeled_data_3 = data[(df.Labels == 3).values]

In [6]:
scaler_1 = MinMaxScaler().fit(labeled_data_1.reshape(-1, 1))
scaled_data = scaler_1.transform(labeled_data_1.reshape(-1, 1))

In [7]:
scaled_data = scaled_data.reshape(labeled_data_1.shape)

In [8]:
scaled_data.shape

(80, 8760)

In [9]:
train, test = train_test_split(scaled_data, train_size = 0.75)

In [146]:
train.shape

(60, 8760)

In [132]:
def timeseries_generator(orig_data, input_size, output, batch_size = 1):
    orig_data = orig_data.tolist()
    X = []
    y = []
    n = (len(orig_data) - input_size - output) // batch_size
    for i in range(n):
        temp_X = []
        temp_y = []
        
        for j in range(batch_size):
            start_index = i * batch_size + j
            end_index = i * batch_size + j + input_size
            
            temp_X += [orig_data[start_index: end_index]]
            temp_y += [orig_data[end_index: end_index + output]]
        
        X += [temp_X]
        y += [temp_y]
        
    return X, y

In [131]:
input_size = 24
output = 8
batch_size = 64

In [153]:
train_X = []
train_y = []
test_X = []
test_y = []

for i in range(train.shape[0]):
    Xi, yi = timeseries_generator(scaled_data[i], input_size = input_size, output = output, batch_size = batch_size)
    train_X += Xi
    train_y += yi
    
for i in range(test.shape[0]):
    Xi, yi = timeseries_generator(scaled_data[i], input_size = input_size, output = output, batch_size = batch_size)
    test_X += Xi
    test_y += yi

In [155]:
train_X = torch.Tensor(train_X)
train_y = torch.Tensor(train_y)
test_X = torch.Tensor(test_X)
test_y = torch.Tensor(test_y)

In [135]:
train_X.shape

torch.Size([8160, 64, 24])

In [156]:
train_y.shape

torch.Size([8160, 64, 8])

In [157]:
test_X.shape

torch.Size([2720, 64, 24])

In [158]:
test_y.shape

torch.Size([2720, 64, 8])

In [139]:
class TimeseriesForecaster(nn.Module):
    def __init__(self, feature = 32):
        super().__init__()
        
        self.lstm1 = nn.LSTM(input_size = input_size, hidden_size = feature, num_layers = 2)
        self.fc1 = nn.Linear(feature, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 8)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.relu(self.lstm1(x)[0])
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [140]:
model = TimeseriesForecaster(feature = 32).to('cpu')

In [141]:
model(train_X[0]).shape

torch.Size([64, 8])

In [142]:
loss_fn = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [152]:
def train_model(model, trainloader, optimizer, loss_fn, n):
    model.train()
    m = 1000
    train_loss = 0
    total_train_loss = 0
    
    for i, (X, y) in enumerate(trainloader):
        optimizer.zero_grad()
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

        if i % m == m - 1:
            print('{} Epoch --> {}/{} with loss of {:.5f}'.format(
                    epoch + 1, i + 1, n, train_loss / m 
                    ))
            total_train_loss += train_loss
            train_loss = 0
    return total_train_loss / n
            
def evaluate_model(model, testloader, loss_fn, n):
    model.eval()
    with torch.no_grad():
        total_test_loss = 0

        for X, y in testloader:
            pred = model(X)
            loss = loss_fn(pred, y)
            total_test_loss += loss.item()

        return total_test_loss / n

In [144]:
epochs = 5

for epoch in range(epochs):
    print('-' * 50)
    
    trainloader = zip(train_X, train_y)
    testloader = zip(test_X, test_y)
    
    train_loss = train_model(model, trainloader, optimizer, loss_fn, n = train_X.shape[0])
    print('\nAverage train loss error --> {:.5f}'.format(train_loss))
    
    test_loss = evaluate_model(model, testloader, loss_fn, n = test_X.shape[0])
    print('Average test loss error --> {:.5f}\n'.format(test_loss))
    

--------------------------------------------------
1 Epoch --> 1000/8160 with loss of 0.05157
1 Epoch --> 2000/8160 with loss of 0.03488
1 Epoch --> 3000/8160 with loss of 0.03179
1 Epoch --> 4000/8160 with loss of 0.03382
1 Epoch --> 5000/8160 with loss of 0.03506
1 Epoch --> 6000/8160 with loss of 0.03608
1 Epoch --> 7000/8160 with loss of 0.03458
1 Epoch --> 8000/8160 with loss of 0.03475

Average train loss error --> 0.03585


AttributeError: 'list' object has no attribute 'shape'

In [160]:
test_loss = evaluate_model(model, testloader, loss_fn, n = test_X.shape[0])
print('Average test loss error --> {:.5f}\n'.format(test_loss))


AttributeError: 'list' object has no attribute 'dim'