In [22]:
import numpy as np
from matplotlib import pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd

In [23]:
df = pd.read_csv('./daily_csv.csv')
df = df.dropna()
y = df['Price'].values
x = np.arange(1, len(y), 1)
minn, maxx = y.min(), y.max()
y = (y - minn) / (maxx - minn)
sequence_length = 10
X, Y = [], []
for i in range(5900):
    l = []
    for j in range(i, i + sequence_length):
        l.append(y[j])
    X.append(l)
    Y.append(y[i + sequence_length])
X = np.array(X)
Y = np.array(Y)

In [24]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.10, random_state=42, shuffle= False, stratify= None)

In [25]:
class Data(Dataset):
    def __init__(self, x, y):
        super().__init__()
        self.x = torch.tensor(x, dtype= torch.float32)
        self.y = torch.tensor(y, dtype= torch.float32)
    def __getitem__(self, ind):
        return self.x[ind], self.y[ind]
    def __len__(self, ):
        return self.x.shape[0]

In [26]:
train = Data(xtrain, ytrain)
train_loader = DataLoader(train, shuffle= True, batch_size= 256, drop_last= True)
test = Data(xtest, ytest)
test_loader = DataLoader(test, batch_size= 256, drop_last= True)

In [27]:
class RNNModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.rnn = torch.nn.RNN(input_size=1, hidden_size= 5, num_layers= 1, batch_first= True)
        self.fc1 = torch.nn.Linear(in_features= 5, out_features= 1)
    def forward(self, x):
        y, h = self.rnn(x)
        # trying to extract the last output of entire vector for all layers and directions
        y = y[: , -1, : ]
        y = self.fc1(torch.relu(y))
        return y

In [28]:
model = RNNModel()
model.to('cuda')
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)

In [29]:
for epoch in range(1500):
    running_loss = 0.0
    for input, target in train_loader:
        optimizer.zero_grad()
        input, target = input.reshape((-1, sequence_length, 1)).to('cuda'), target.to('cuda')
        output = model(input).reshape(-1)
        loss = criterion(output, target)
        loss.backward()
        running_loss += loss.item()
        optimizer.step()
    if epoch % 50 == 0:
        print(f'epoch - {epoch}, loss - {running_loss}')

epoch - 0, loss - 0.3561137607321143
epoch - 50, loss - 0.1762724737636745
epoch - 100, loss - 0.015560534346150234
epoch - 150, loss - 0.009564765845425427
epoch - 200, loss - 0.008302963033202104
epoch - 250, loss - 0.007863947088480927
epoch - 300, loss - 0.0073069755744654685
epoch - 350, loss - 0.006735996423230972
epoch - 400, loss - 0.00671276832872536
epoch - 450, loss - 0.006378016514645424
epoch - 500, loss - 0.005106864307890646
epoch - 550, loss - 0.006063235276087653
epoch - 600, loss - 0.0059487084072316065
epoch - 650, loss - 0.005864939703315031
epoch - 700, loss - 0.005775450830697082
epoch - 750, loss - 0.005416631691332441
epoch - 800, loss - 0.005506447996594943
epoch - 850, loss - 0.0054218868317548186
epoch - 900, loss - 0.00537783422623761
epoch - 950, loss - 0.005239460268057883
epoch - 1000, loss - 0.005133400743943639
epoch - 1050, loss - 0.0050511166045907885
epoch - 1100, loss - 0.004924915869196411
epoch - 1150, loss - 0.0048647876174072735
epoch - 1200, lo

In [30]:
model.eval()
all_preds, all_labels = [], []
for input, target in test_loader:
    input, target = input.reshape((-1, sequence_length, 1)).to('cuda'), target.to('cuda')
    output = model(input).reshape(-1)
    all_preds.extend(output.to('cpu').detach().numpy())
    all_labels.extend(target.to('cpu').detach().numpy())

In [31]:
from sklearn.metrics import mean_squared_error
print(mean_squared_error(all_preds, all_labels))

4.51410705445626e-05
