In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn

from helper import *

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
# read in live data
df = pd.read_csv("../data/processed/live_60.csv")

# update timestamp dtype
df = update_timestamps(df, "published")


df.info()

In [None]:
def integrate_garage(df, old, new=None, shift=1):
    if new is None:
        new = old
    df[new] = df[old] - df[old].shift(shift)
    return df

def integrate_df(df, old, new=None, shift=1):
    
    new_df = pd.DataFrame()
    df = df.copy()
    for t in set(df["title"]):
        sub_df = df.loc[df["title"]==t]
        sub_df = sub_df.sort_values("published")
        sub_df = integrate_garage(sub_df, old, new, shift)
        new_df = new_df.append(sub_df)

    return new_df

df = integrate_df(df, "free", "free [i]")
df = df.dropna()

In [None]:
df

## torch

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

In [None]:
sub_df = df.loc[df["title"]=="Parkhaus Steinen"]["free"]

In [None]:

class TrainTestSplit():
    
    def __init__(self):
        pass

# #lstm
# def sliding_window(data, seq_len):
#     '''Creates a sliding window over the dataset with len 12/12 for train/test'''
#     X = []
#     Y = []
    
#     for i in range(len(data) - seq_len-seq_len):
#         x = data[i:(i+seq_len)].to_numpy()
#         y = data[(i+1):(i+seq_len+1)].to_numpy()
#         X.append(x)
#         Y.append(y)
        
#     return np.array(X), np.array(Y)

#cnn
def sliding_window(data, seq_len):
    '''Creates a sliding window over the dataset with len 12/1 for train/test'''
    X = []
    Y = []
    
    for i in range(len(data) - seq_len-seq_len):
        x = data[i:(i+seq_len)].to_numpy()
        y = data[(i+seq_len):(i+seq_len+1)].to_numpy()
        X.append(x)
        Y.append(y)
        
    return np.array(X), np.array(Y)

train_seq_len = 12
train_ratio = 0.8

def train_test_split(df, train_ratio, train_seq_len):

    len_train = int(len(df) * train_ratio)
    
    X_train, Y_train = sliding_window(df[:len_train], train_seq_len)
    X_test, Y_test = sliding_window(df[len_train:], train_seq_len)
    
    return X_train, Y_train, X_test, Y_test

X_train, Y_train, X_test, Y_test = train_test_split(sub_df, train_ratio, train_seq_len)

In [None]:
from torch.utils.data import Dataset

class TimeSeriesDataSet(Dataset):
    def __init__(self, sequences, labels):
        self.labels = labels
        self.sequences = sequences
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        sequences = self.sequences[idx]
        sequences = torch.from_numpy(self.sequences[idx]).float()
        
        labels = self.labels[idx]
        labels = torch.from_numpy(self.labels[idx]).float()

        return sequences, labels

In [None]:
train_set = TimeSeriesDataSet(X_train, Y_train)
test_set = TimeSeriesDataSet(X_test, Y_test)

In [None]:
from torch.utils.data import DataLoader
# Set seed 
torch.manual_seed(42)
batch_size = 64

train_dataloader = DataLoader(
    dataset=train_set, 
    batch_size=batch_size, 
    shuffle=True)

test_dataloader = DataLoader(
    dataset=test_set, 
    batch_size=batch_size, 
    shuffle=False)

In [None]:
example_batch = iter(train_dataloader)
sequences, labels = example_batch.next()
np.shape(sequences), np.shape(labels)

In [None]:
sequences, labels

In [None]:
class LSTMForecaster2(nn.Module):
    '''
    LSTM model for predicting timeseries
    Args:
        input_size (int): nr of input features
        hidden_size (int): hidden layer size
        num_layers (int): number of layers in the lstm
        seq_length (int): length of the prediction
    '''
    def __init__(self, hidden_size, dropout=0):
        super(LSTMForecaster2, self).__init__()
        
        self.hidden_size = hidden_size
      
        self.dropout = dropout
        
        self.lstm1 = nn.LSTMCell(1, hidden_size)
        self.lstm2 = nn.LSTMCell(hidden_size, hidden_size)
        self.linear = nn.Linear(hidden_size, 1)


    def forward(self, X, future=0):
        outputs = []
        n_samples = X.size(0)
        
        h_t = torch.zeros(n_samples, self.hidden_size, dtype=torch.float32).to(device)
        c_t = torch.zeros(n_samples, self.hidden_size, dtype=torch.float32).to(device)
        
        h_t2 = torch.zeros(n_samples, self.hidden_size, dtype=torch.float32).to(device)
        c_t2 = torch.zeros(n_samples, self.hidden_size, dtype=torch.float32).to(device)
    
        for input_t in X.split(1, dim=1):
            h_t, c_t = self.lstm1(input_t, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs.append(output)
            
        for i in range(future):
            h_t, c_t = self.lstm1(output, (h_t, c_t))
            h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
            output = self.linear(h_t2)
            outputs.append(output)
            
        outputs = torch.cat(outputs, dim=1)
        return outputs

In [None]:
from torch.optim import Adam
from torch.nn import MSELoss


class ModelTrainer():
    
    def __init__(self, model, lr):
        self.model = model
        self.lr = lr
        self.criterion = MSELoss()
        self.log = dict(
            train=[], 
            test=[]
        )
        
    def predict(self, x, y, future=0):
        with torch.no_grad():
            labels_pred = self.model(x, future)
            loss = self.criterion(labels_pred[:,-future:].float(), y)
            y = labels_pred.detach().numpy()
            return y

    def train_model(self, train_dataloader, test_dataloader=None, num_epochs = 100):

        optimiser = Adam(self.model.parameters(), lr=self.lr)
        


        for epoch in range(num_epochs):
            print("epoch:", epoch)

            train_epoch_loss = 0
            test_epoch_loss = 0

            for batch, (sequences, labels) in enumerate(train_dataloader):
                sequences = sequences.to(device)
                labels = labels.to(device)

                labels_pred = self.model(sequences)
                loss = self.criterion(labels_pred.float(), labels)
                
                optimiser.zero_grad()
                loss.backward()
                optimiser.step()

                train_epoch_loss += loss.item()
                
            
            if test_dataloader is not None:
                for batch, (sequences, labels) in enumerate(test_dataloader):
                    sequences = sequences.to(device)
                    labels = labels.to(device)
                    
                    with torch.no_grad():
                        
                        labels_pred = self.model(sequences)
                        loss = self.criterion(labels_pred.float(), labels)
                        
                        test_epoch_loss += loss.item()

            print(train_epoch_loss, test_epoch_loss)

In [None]:
hidden_size = 50
model = LSTMForecaster2(hidden_size).to(device)
        
trainer = ModelTrainer(model, lr=0.005)    
trainer.train_model(train_dataloader, test_dataloader)

In [None]:
example_batch = iter(test_dataloader)
sequences, labels = example_batch.next()
sequences = sequences.to(device)
labels = labels.to(device)

with torch.no_grad():
    labels_pred = trainer.model(sequences, future=12)
    restults = labels_pred.detach().cpu().numpy()

In [None]:
i=51

sequences, labels = sequences.to("cpu"), labels.to("cpu")
plt.figure(figsize=(10,5))
plt.plot(range(len(sequences[i])), sequences[i], "-o")
plt.plot(range(1, 1+len(labels[i])), labels[i], "o")
plt.plot( range(1,len(restults[i])+1), restults[i], "-o")

# plt.legend()
plt.show()

In [None]:
import torch.nn.functional as F

class CNNForecaster(nn.Module):
    def __init__(self, kernel_size=3, pool_size=2, padding=0, conv1_channels = 120, 
                 conv2_channels=120, conv3_channels=120, fc_linear_1=180, dropout=0.5):
        '''Convolutional Net class'''
        super(CNNForecaster, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=conv1_channels, kernel_size=kernel_size, padding=padding) 
        self.conv2 = nn.Conv1d(in_channels=conv1_channels, out_channels=conv2_channels, kernel_size=kernel_size, padding=padding) 
        self.conv3 = nn.Conv1d(in_channels=conv2_channels, out_channels=conv3_channels, kernel_size=kernel_size, padding=padding) 
        
        self.pool = nn.MaxPool1d(kernel_size=pool_size, stride=1)
        
        self.fc1 = nn.Linear(in_features=conv3_channels*3, out_features=fc_linear_1)
        self.fc2 = nn.Linear(in_features=fc_linear_1, out_features=1)
        
        self.conv3_channels = conv3_channels
        
        self.dropout = nn.Dropout(p=dropout)
        
        self.flatten = nn.Flatten()
        
    def forward(self, x):
        '''
        Applies the forward pass
        Args:
            x (torch.tensor): input feature tensor
        Returns:
            x (torch.tensor): output tensor of size num_classes
        '''
        x = x.reshape(x.shape[0], 1, x.shape[1])
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.flatten(x)
#         print(x.shape)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [None]:
model = CNNForecaster().to(device)

trainer = ModelTrainer(model, lr=0.005)    
trainer.train_model(train_dataloader, test_dataloader)