# Own Train set

In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
%matplotlib inline

In [None]:
def LoadDataframe():
    df = pd.read_csv('reducedLstmData.csv', delimiter = ';', index_col = None)
    #df['target'] = df.price_close.diff()
    df['target'] = df.price_close.shift(-1)
    df = df.head(df.shape[0]-1)
    return df

In [None]:
def TrainTestSplit(df, ratio):
    train_size = int(len(df)*ratio)
    test_size = len(df)-train_size
    train_set = df.iloc[: train_size]
    test_set = df.iloc[-test_size:]

    return train_set, test_set

def input_data(seq, ws):
    out = []
    L = len(seq)
    
    for i in range(L-ws):
        window = seq.iloc[i:i+window_size]
        window_values = torch.cuda.FloatTensor(window.values)
        label = seq.target.iloc[i+ws:i+ws+1]
        label_value = torch.cuda.FloatTensor(label.values)
        out.append((window_values, label_value))
        
    return out

In [None]:
df = LoadDataframe()
df = df.head(1000)

In [None]:
scaler = StandardScaler()

In [None]:
window_size = 50
df = (df-df.mean())/df.std()
train_df, test_df = TrainTestSplit(df, 0.8)
#train_df = (train_df-train_df.mean())/train_df.std()
#test_df = (test_df-test_df.mean())/test_df.std()
train_data = input_data(train_df, window_size)

In [None]:
train_df.shape

In [None]:
test_df.shape

In [None]:
len(train_data)

In [None]:
train_df.head(3)

In [None]:
test_df.head(1)

In [None]:
plt.figure(figsize =(14,4))
plt.xlim(0, df.shape[0])
plt.grid(True)
plt.plot(df.target)


In [None]:
plt.figure(figsize =(14,4))
plt.xlim(0, train_df.shape[0]+test_df.shape[0])
plt.grid(True)
plt.plot(train_df.target)
plt.plot(test_df.target)

In [None]:
class MLSTM(nn.Module):
    def __init__(self, input_size = 8, hidden_size = 50, out_size = 1):
        super().__init__()
        self.hidden_size = hidden_size
        
        self.lstm = nn.LSTM(input_size, hidden_size)
        self.linear = nn.Linear(hidden_size, out_size)
        
        self.hidden = (torch.zeros(1,1, hidden_size), torch.zeros(1,1, hidden_size))#tu sa musi nieco zmenit #dotrenovanie
        
    def forward(self, seq):
        #import pdb; pdb.set_trace()
        lstm_out, self.hidden = self.lstm(seq.view(len(seq), 1,-1))
        #lstm_oud, (hid, cell) = self.lstm(
        # pred[-1] = self.linear(hid)
        pred = self.linear(lstm_out.view(len(seq), -1))
        #import pdb; pdb.set_trace()
        return pred[-1]
        

In [None]:
train_data[0][0].view(len(train_data[0][0]), 1,-1).shape

In [None]:
torch.manual_seed(42)
model = MLSTM()
model.cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

In [None]:
epochs = 150
future = 200

for i in range(epochs):
    
    for seq, y_train in train_data:
        optimizer.zero_grad()
        model.hidden = (torch.zeros(1,1,model.hidden_size), 
                       torch.zeros(1,1,model.hidden_size)) # inicializacia
        
        y_pred = model(seq)
        
        loss = criterion(y_pred, y_train)
        loss.backward()
        optimizer.step()
        
    print(f"Epoch {i} Loss: {loss.item()}")
    
    preds = train_df[-window_size:].values
    #print(preds.shape)
    for f in range(future):
        #print(preds.shape)
        seq = torch.cuda.FloatTensor(preds[-window_size:])
        
        with torch.no_grad():
            
            model.hidden = (torch.zeros(1,1,model.hidden_size),
                           torch.zeros(1,1,model.hidden_size)) 
            
            
            app = test_df.iloc[f:f+1].loc[:, test_df.columns != 'target'].values
            y_pred = np.array(model(seq).item())
            new = np.insert(app,7,y_pred)
            preds = np.insert(preds, preds.shape[0], new, axis=0)
            #import pdb; pdb.set_trace()
            #preds.append(model(seq).item())
            
    #import pdb; pdb.set_trace()
    loss = criterion(torch.tensor(preds[-window_size:][:,preds.shape[1]-1]), torch.tensor(test_df.iloc[0:window_size].target.values))
    print(f"Performance on test range: {loss}")

    #import pdb; pdb.set_trace()
    plt.figure(figsize = (14,4))
    plt.xlim(0, len(train_data)+future)
    #plt.ylim(6000, 10000)
    plt.grid(True)
    plt.plot(train_df.iloc[window_size:].target.values, label='historical')
    plt.plot(range(len(train_data), len(train_data)+future), preds[-future:][:,7], label='forecast')
    plt.plot(range(len(train_data), len(train_data)+future), test_df.target.head(future).values, label='true')
    plt.legend(loc="upper left")
    plt.show()
        
        

In [None]:
lstm = nn.LSTM(8, 2, batch_first=True)

In [None]:
train_data[0][0].unsqueeze(0).shape

In [None]:
train_data[0][0].float()

In [None]:
torch.Tensor(train_data[0][0])

In [None]:
lstm(train_data[0][0].float().unsqueeze(0))

In [None]:
torch.zeros(1,1,50)