In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

import wandb

import torch
import torch.nn as nn
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.io as pio

from sklearn.preprocessing import StandardScaler
from copy import deepcopy as dc
import tqdm
import time

import plotly.io as pio
pio.orca.config.use_xvfb = True

pio.orca.config.executable = 'path/orca'

In [None]:
CUDA_DEVICE = 1
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.device(CUDA_DEVICE))
print(torch.device(CUDA_DEVICE))
print(torch.cuda.get_device_name(CUDA_DEVICE))
print(torch.cuda.current_device())

device = torch.device(CUDA_DEVICE)

In [None]:
wandb.init(
    project="name",
    name="MAE", 

    config={
    "learning_rate": 0.001,
    "architecture": "LSTM",
    "dataset": "30min",
    "epochs": 100,
    }
)

wandb.define_metric("MAE", summary="min")
wandb.define_metric("MASE", summary="min")

In [6]:
data = pd.read_pickle('dataset')

n = len(data)
data.insert(0, 'time_series', list(range(1, n+1)))
data = data[['time_series', '30m-item71']]

fig = make_subplots(rows=1, cols=1)
fig.append_trace(go.Scatter(
                            x=data.index,
                            y=data['30m-item71'].values,
                            name="test",
                            line_color='blue'
                        ), row=1, col=1),
fig.update_layout(
    font_family="Gyre Bonum",
    margin=dict(
    l=20,
    r=10,
    b=20,
    t=10,
    pad=4
),)
fig.show(renderer='iframe')

In [None]:
from copy import deepcopy as dc

def prepare_dataframe_for_lstm(df, n_steps):
    df = dc(df)

    df.set_index('time_series', inplace=True)

    for i in range(1, n_steps+1):
        df[f'30m(t-{i})'] = df['30m-item71'].shift(i)

    df.dropna(inplace=True)

    return df

shifted_df = prepare_dataframe_for_lstm(data, 384)
shifted_df

In [None]:
train1 = shifted_df[27:15549]
train2 = shifted_df[15602:16602]
train = np.concatenate((train1, train2))

pandas_train = pd.DataFrame(train)

shiftet_real_unscaled = train2['30m-item71'].shift()
diffs = train2['30m-item71'] - shiftet_real_unscaled
avg_diff_unscaled = torch.as_tensor(diffs.abs().mean()).to(device)
print(avg_diff_unscaled)

from sklearn.preprocessing import MinMaxScaler

std_scaler = MinMaxScaler(feature_range=(1, 5))
std_scaler.fit(train)
data_scaled = std_scaler.transform(shifted_df)

data_scaled

In [None]:
X = data_scaled[:, 1:]
y = data_scaled[:, 0]

X.shape, y.shape

In [None]:
first_elements = X[:, [0]].reshape(-1)

differences = np.abs(np.subtract(first_elements, np.roll(first_elements, -1)))
avg_diff_scaled = differences[:-1].mean()
avg_diff_scaled = torch.as_tensor(avg_diff_scaled).to(device)
avg_diff_scaled

In [None]:
X_train1 = X[27:15549]
X_train2 = X[15602:16602]
X_train = np.concatenate((X_train1, X_train2))
X_test = X[16602-384-1:17459]

y_train1 = y[27:15549]
y_train2 = y[15602:16602]
y_train = np.concatenate((y_train1, y_train2))
y_test = y[16602-384-1:17459]

train_size = y_train.shape
test_size = y_test.shape

X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
X_train = X_train.reshape((-1, lookback, 1))
X_test = X_test.reshape((-1, lookback, 1))

y_train = y_train.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))

X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
X_test = torch.tensor(X_test).float()
y_test = torch.tensor(y_test).float()

X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [14]:
from torch.utils.data import Dataset

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)

In [15]:
from torch.utils.data import DataLoader

batch_size = 128

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
for _, batch in enumerate(train_loader):
    x_batch, y_batch = batch[0].to(device), batch[1].to(device)
    print(x_batch.shape, y_batch.shape)
    break

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_stacked_layers, dropout_rate):
        super().__init__()
        
        self.lstm = nn.LSTM(
            input_size, hidden_size, num_stacked_layers, batch_first=True, dropout=dropout_rate
        )

        self.fc = nn.Linear(hidden_size, 1)


    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

model = LSTM(1, 128, 3, 0.1)
model.to(device)
model

In [18]:
def train_one_epoch():
    model.train(True)
    print(f'Epoch: {epoch + 1}')
    epoch_loss = 0.0
    epoch_metrics = 0.0
    
    loop = tqdm.tqdm(train_loader)

    for batch_index, batch in enumerate(loop, 1):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)

        output = model(x_batch)
        
        # loss = loss_function(output, y_batch)
        loss = custom_loss(output, y_batch, x_batch[:,-1,:])

        metrics = nn.functional.l1_loss(output, y_batch)

        epoch_loss += loss.item()
        epoch_metrics += metrics.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loop.set_postfix(loss=loss.item())

    return epoch_loss, epoch_metrics

In [19]:
def validate_one_epoch():
    model.train(False)
    running_loss = 0.0
    running_metrics = 0.0

    for batch_index, batch in enumerate(test_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)

        with torch.no_grad():
            output = model(x_batch)
            
            loss = custom_loss(output, y_batch, x_batch[:,-1,:])
            # loss = loss_function(output, y_batch)
            
            metrics = nn.functional.l1_loss(output, y_batch)
            
            running_loss += loss.item() * len(x_batch)
            running_metrics += metrics.item() * len(x_batch)
            wandb.log({"acc": metrics, "loss": loss})

    avg_loss_across_batches = running_loss / len(test_loader)
    avg_metrics_across_batches = running_metrics / len(test_loader)

    print('Val Loss: {0:.3f}'.format(avg_loss_across_batches))
    print('Metrics val Loss: {0:.3f}'.format(avg_metrics_across_batches))
    print('-------------------------------------------------')
    print()

In [20]:
def custom_loss(pred, real, real_minus1):
    dPR = torch.abs(real - pred)
    dPRM = torch.sqrt(avg_diff_scaled**2 + (pred - real_minus1)**2)
    dRRM = torch.sqrt(avg_diff_scaled**2 + (real - real_minus1)**2)
    cosR = (dPRM**2 + dRRM**2 - dPR**2)/(2*dPRM*dRRM)
    cosR = torch.clamp(cosR, -0.999999, 0.999999)
    loss =  torch.mean(torch.arccos(cosR))
    return loss

In [20]:
def rmse(output, target):
    eps = 1e-8
    loss = torch.mean(torch.sqrt(eps + (output - target)**2))
    return loss

def msle(output, target):
    output = torch.clamp(output, 0, 5)
    target = torch.clamp(target, 0, 5)
    loss = torch.mean(((torch.log(1 + output) - torch.log(1 + target)))**2)
    return loss

def mase(output, target):
    loss = torch.mean(torch.abs(output-target) / avg_diff_scaled)
    return loss

def rmsse(output, target):
    eps = 1e-8
    loss = torch.mean(torch.sqrt(eps + ((output - target)**2) / avg_diff_scaled**2))
    return loss

def poisson(output, target):
    output = torch.clamp(output, 0, 5)
    target = torch.clamp(target, 0, 5)
    loss = torch.mean(output - target * torch.log(output) + torch.log(torch.exp(torch.lgamma(target + 1))))
    return loss


def logCosh(output, target):
    output = torch.clamp(output, 0, 5)
    target = torch.clamp(target, 0, 5)
    loss = torch.mean(torch.log(torch.cosh(output - target)))
    return loss

def mape(output, target):
    loss = torch.mean(torch.abs(target - output) / torch.abs(target))
    return loss

In [None]:
learning_rate = 0.001
num_epochs = 100
loss_function = "function name"

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    train_one_epoch()
    validate_one_epoch()

In [None]:
with torch.no_grad():
    predicted = model(X_train.to(device)).to('cuda:0').numpy()

In [22]:
test_predictions = model(X_test.to(device)).detach().cpu().numpy().flatten()


dummies = np.zeros((X_test.shape[0], lookback+1))
dummies[:, 0] = test_predictions
dummies = std_scaler.inverse_transform(dummies)

test_predictions = dc(dummies[:, 0])

dummies = np.zeros((X_test.shape[0], lookback+1))
dummies[:, 0] = y_test.flatten()
dummies = std_scaler.inverse_transform(dummies)

new_y_test = dc(dummies[:, 0])

In [None]:
predicted_unscaled = torch.tensor(test_predictions)
train_unscaled = torch.tensor(new_y_test)

train_size = y_train.shape[0]
test_size = y_test.shape[0]

def metric_mase(output, target):
    MAE = torch.mean(torch.abs(output-target))
    return MAE / avg_diff_unscaled

mae_loss_metric = nn.functional.l1_loss(predicted_unscaled, train_unscaled)
print(mae_loss_metric.item())

mase_loss_metric = metric_mase(predicted_unscaled, train_unscaled)
print(mase_loss_metric.item())

log_dict = {
        "MAE": mae_loss_metric,
        "MASE": mase_loss_metric,
    }
wandb.log(log_dict)
wandb.finish()


In [None]:
dataset_test = pd.DataFrame({'pred': test_predictions, 'real': new_y_test}, columns=['pred', 'real'])
dataset_test.index

In [25]:
data_predikovane_3_parts = [dataset_test.iloc[:285], 
                       dataset_test.iloc[285:285+285], 
                       dataset_test.iloc[285+285:]] 

In [26]:
def draw(predictions, colors):
    fig = make_subplots(rows=1, cols=1)

    for i in range(len(predictions.columns)):
        
        if i == 0: 
            opacity = 1
        else:
            opacity = 0.7
        fig.append_trace(go.Scatter(
                            x=predictions.index,
                            y=predictions[predictions.columns[i]].values,
                            name=predictions.columns[i],
                            line=dict(color=colors[i], width=1.5),
                            opacity=opacity
                        ), row=1, col=1)
    
    fig.update_layout(
                font_family="Gyre Bonum",
        font_size=20,
        xaxis_title="Čas",
        yaxis_title="Počet útokov",
        margin=dict(
        l=20,
        r=20,
        b=20,
        t=20,
        pad=4
    ),
        height=300, 
    )
    return fig

In [None]:
for i in range(len(data_predikovane_3_parts)):
    nakres_more = draw(data_predikovane_3_parts[i][
        ['real', 'pred']
        ], ["blue", "red"])
    nakres_more.show(renderer='iframe')
    pio.write_image(nakres_more, 'path/Loss_naem{}.eps'.format([i]), width=2000, height=600, format='eps'
                    , engine='kaleido' 
                    )
