## data loading

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn

import wandb

from helper import *

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from pytorch_model_summary import summary

In [None]:
wandb.login()

In [None]:
# read in live data
df = pd.read_csv("../data/processed/live_20.csv")

# update timestamp dtype
df = update_timestamps(df, "published")

df.info()

In [None]:
def integrate_garage(df, old, new=None, shift=1):
    if new is None:
        new = old
    df[new] = df[old] - df[old].shift(shift)
    return df

def integrate_df(df, old, new=None, shift=1):
    
    new_df = pd.DataFrame()
    df = df.copy()
    for t in set(df["title"]):
        sub_df = df.loc[df["title"]==t]
        sub_df = sub_df.sort_values("published")
        sub_df = integrate_garage(sub_df, old, new, shift)
        new_df = new_df.append(sub_df)

    return new_df

df = integrate_df(df, "free", "free [i]")
df = df.dropna()

In [None]:
dict(df.groupby('title').max()['total'])

## torch

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

In [None]:
sub_df = df.loc[df["title"]=="Parkhaus Steinen"]["free"]

In [None]:
#cnn
def sliding_window(data, seq_len):
    '''Creates a sliding window over the dataset with len x/1 for train/test'''
    X = []
    Y = []
    
    for i in range(len(data) - seq_len-seq_len):
        x = data[i:(i+seq_len)].to_numpy()
        y = data[(i+seq_len):(i+seq_len+1)].to_numpy()
        X.append(x)
        Y.append(y)
        
    return np.array(X), np.array(Y)

def train_test_split(df, train_ratio, train_seq_len):

    len_train = int(len(df) * train_ratio)
    
    X_train, Y_train = sliding_window(df[:len_train], train_seq_len)
    X_test, Y_test = sliding_window(df[len_train:], train_seq_len)
    
    return X_train, Y_train, X_test, Y_test

In [None]:
train_seq_len = 48
train_ratio = 0.8
X_train, Y_train, X_test, Y_test = train_test_split(sub_df, train_ratio, train_seq_len)

In [None]:
from torch.utils.data import Dataset

class TimeSeriesDataSet(Dataset):
    def __init__(self, sequences, labels):
        self.labels = labels
        self.sequences = sequences
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        sequences = self.sequences[idx]
        sequences = torch.from_numpy(self.sequences[idx]).float()
        
        labels = self.labels[idx]
        labels = torch.from_numpy(self.labels[idx]).float()

        return sequences, labels

In [None]:
train_set = TimeSeriesDataSet(X_train, Y_train)
test_set = TimeSeriesDataSet(X_test, Y_test)

In [None]:
from torch.utils.data import DataLoader
# Set seed 
torch.manual_seed(42)
batch_size = 64

train_dataloader = DataLoader(
    dataset=train_set, 
    batch_size=batch_size, 
    shuffle=True)

test_dataloader = DataLoader(
    dataset=test_set, 
    batch_size=batch_size, 
    shuffle=False)

In [None]:
example_batch = iter(train_dataloader)
sequences, labels = example_batch.next()
np.shape(sequences), np.shape(labels)

In [None]:
from torch.optim import Adam
from torch.nn import MSELoss


class ModelTrainer():
    
    def __init__(self, model, lr):
        self.model = model
        self.lr = lr
        self.criterion = MSELoss(reduction='sum')
        self.score_test_last_epoch = []
        self.score_train_last_epoch = []
        self.log = dict(
            train_loss=[], 
            test_loss=[],
            train_score=[],
            test_score=[],
            train_mae=[],
            test_mae=[]
        )
        
    def predict(self, x, y, future=0):
        with torch.no_grad():
            labels_pred = self.model(x, future)
            loss = self.criterion(labels_pred[:,-future:].float(), y)
            y = labels_pred.detach().numpy()
            return y

    def train_model(self, train_dataloader, test_dataloader=None, num_epochs = 100):
        
        optimiser = Adam(self.model.parameters(), lr=self.lr)
        for epoch in range(num_epochs):
            print("epoch:", epoch+1)
            len_test = 0
            len_train = 0
            train_epoch_loss = 0
            test_epoch_loss = 0
            residuals = []
            for batch, (sequences, labels) in enumerate(train_dataloader):
                sequences = sequences.to(device)
                labels = labels.to(device)
                labels_pred = self.model(sequences)

                loss = self.criterion(labels_pred.float(), labels)
                
                optimiser.zero_grad()
                loss.backward()
                optimiser.step()
                len_train += len(labels)
                train_epoch_loss += loss.item()
                residuals.extend(labels_pred.cpu().detach().numpy() - labels.cpu().detach().numpy())
            MAE_train = np.sqrt(train_epoch_loss / len_train)
            if test_dataloader is not None:
                residuals_test = []
                for batch, (sequences, labels) in enumerate(test_dataloader):
                    sequences = sequences.to(device)
                    labels = labels.to(device)
                    len_test += len(labels)
                    with torch.no_grad():
                        
                        labels_pred = self.model(sequences)
                        loss = self.criterion(labels_pred.float(), labels)
                        test_epoch_loss += loss.item()
                        residuals_test.extend(labels_pred.cpu().numpy() - labels.cpu().numpy())
                MAE_test = np.sqrt(test_epoch_loss / len_test)
            self.score_test_last_epoch = residuals_test
            self.score_train_last_epoch = residuals

            self.log["train_loss"].append(train_epoch_loss)
            self.log["test_loss"].append(test_epoch_loss)
            print(train_epoch_loss, test_epoch_loss)
            rmse_sample_test = np.sqrt(test_epoch_loss/len_test)
            rmse_sample_train = np.sqrt(train_epoch_loss/len_train)
            self.log["train_score"].append(rmse_sample_train)
            self.log["test_score"].append(rmse_sample_test)
            self.log["train_mae"].append(MAE_train)
            self.log["test_mae"].append(MAE_test)
            wandb.log(
                {
                "MAE_train": MAE_train,
                "MAE_test": MAE_test,
                "RMSE_train": rmse_sample_train,
                "RMSE_test": rmse_sample_test,
                "train_loss": train_epoch_loss,
                "test_loss": test_epoch_loss
                }, step=epoch)
                
            print("rmse testset:", rmse_sample_test)
            print("rmse trainset:", rmse_sample_train)

In [None]:
import model_architectures

import torch.nn.functional as F
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn

import wandb

from helper import *

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from pytorch_model_summary import summary


class CNNForecaster(nn.Module):
    def __init__(self, kernel_size=3, pool_size=2, padding=0, conv1_channels = 120, 
                 conv2_channels=120, conv3_channels=120, fc_linear_1=180, dropout=0.5):
        '''Convolutional Net class'''
        super(CNNForecaster, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=conv1_channels, kernel_size=kernel_size, padding=padding) 
        self.conv2 = nn.Conv1d(in_channels=conv1_channels, out_channels=conv2_channels, kernel_size=kernel_size, padding=padding) 
        self.conv3 = nn.Conv1d(in_channels=conv2_channels, out_channels=conv3_channels, kernel_size=kernel_size, padding=padding) 
        
        self.pool = nn.MaxPool1d(kernel_size=pool_size, stride=1)
        
        self.fc1 = nn.Linear(in_features=conv3_channels*3, out_features=fc_linear_1)
        self.fc2 = nn.Linear(in_features=fc_linear_1, out_features=1)
        
        self.conv3_channels = conv3_channels
        
        self.dropout = nn.Dropout(p=dropout)
        
        self.flatten = nn.Flatten()
        
    def forward(self, x):
        '''
        Applies the forward pass
        Args:
            x (torch.tensor): input feature tensor
        Returns:
            x (torch.tensor): output tensor of size num_classes
        '''
        x = x.reshape(x.shape[0], 1, x.shape[1])
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

class CNN2(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=1),
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=1),
            nn.Conv1d(in_channels=128, out_channels=64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=1),
            nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=2),
            nn.Flatten(),
            nn.Linear(1152, 1),
        )

    def forward(self, x):
        x = x.reshape(x.shape[0], 1, x.shape[1])
        logits = self.network(x)
        return logits

class CNN3(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=64, kernel_size=3),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.MaxPool1d(kernel_size=2, stride=1),
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=1),
            nn.Conv1d(in_channels=128, out_channels=64, kernel_size=3),
            nn.Dropout(0.1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=1),
            nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3),
            nn.Dropout(0.1),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=2),
            nn.Flatten(),
            nn.Linear(1152, 180),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.BatchNorm1d(180),
            nn.ReLU(),
            nn.Linear(180, 1),
            nn.Dropout(0.1)
        )

    def forward(self, x):
        x = x.reshape(x.shape[0], 1, x.shape[1])
        logits = self.network(x)
        return logits

class CNN4(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=64, kernel_size=3),
            nn.Dropout(0.2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=1),
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3),
            nn.Dropout(0.2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=1),
            nn.Conv1d(in_channels=128, out_channels=64, kernel_size=3),
            nn.Dropout(0.2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=1),
            nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3),
            nn.Dropout(0.1),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=2),
            nn.Flatten(),
            nn.Linear(1152, 1),
            nn.Dropout(0.2)
        )

    def forward(self, x):
        x = x.reshape(x.shape[0], 1, x.shape[1])
        logits = self.network(x)
        return logits

class CNN2_Dropout(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=1),
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=1),
            nn.Conv1d(in_channels=128, out_channels=64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=1),
            nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=2),
            nn.Flatten(),
            nn.Linear(1152, 1),
            nn.Dropout(0.2)
        )

    def forward(self, x):
        x = x.reshape(x.shape[0], 1, x.shape[1])
        print(x.shape)
        logits = self.network(x)
        return logits

model = CNN2_Dropout().to(device)
# print(summary(model_architectures.CNN3()), input_size=(64, 48, 1))

In [None]:
# hidden_size = 50

num_epochs = 30
lr = 0.0001
batch_size = batch_size
len_seq = train_seq_len
model_arch = "CNN2_dropout"
config = {"num_epochs":num_epochs, "lr":lr, "batch_size":batch_size, "len_seq":len_seq, "model":model_arch}

trainer = ModelTrainer(model, lr=lr)  

with wandb.init(project="chx_models", config=config) as run:
    
    # set run name to training start timestamp
    run.name = "{}".format(datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))
    
    # run model training
    trainer.train_model(train_dataloader, test_dataloader, num_epochs=num_epochs)

In [None]:
plt.plot(trainer.log["train_score"], label="train")
plt.plot(trainer.log["test_score"], label="test")
plt.legend()
plt.title("RMSE")
plt.xlabel("Epochs")
plt.ylabel("RMSE")
plt.show()


In [None]:
import os
torch.save(model.state_dict(), os.path.join(os.getcwd(), 'models', 'CNN2.pth'))

In [None]:
test_model = CNN2_Dropout()
test_model.load_state_dict(torch.load(os.path.join(os.getcwd(), 'models', 'CNN2.pth')))
test_model.eval()
print(torch.randint(100, 400, (48, 1)).float().shape)
test_model(torch.randint(100, 400, (1,48)).float())

In [None]:
print(torch.randint(10, 142, (1, 48)).float())

## Ziel f√ºr die Metrik

In [None]:
plt.hist(sub_df)
plt.title("Histogram of the target variable")
plt.xlabel("Target variable")
plt.ylabel("Frequency")
plt.show()

sub_df.describe()
plt.plot(sub_df)
plt.title("Target variable")
plt.xlabel("Time")
plt.ylabel("Target variable")
plt.show()

In [None]:
# plt.boxplot(sub_df)
# plt.title("Boxplot of the target variable")
# plt.xlabel("Target variable")
# plt.ylabel("Frequency")
# plt.show()

# max(sub_df)
# sub_df.values
# sub_df.groupby(sub_df.values).mean()
print('95% quantil', np.percentile(sub_df, 95))
print('max', np.max(sub_df))