In [1]:
import numpy as np
from matplotlib import pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd

In [2]:
df = pd.read_csv('./daily_csv.csv')
df = df.dropna()
y = df['Price'].values
x = np.arange(1, len(y), 1)
minn, maxx = y.min(), y.max()
y = (y - minn) / (maxx - minn)
sequence_length = 10
X, Y = [], []
for i in range(5900):
    l = []
    for j in range(i, i + sequence_length):
        l.append(y[j])
    X.append(l)
    Y.append(y[j + 1])
X = np.array(X)
Y = np.array(Y)

In [3]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.10, random_state=42, shuffle= False, stratify= None)

In [4]:
class Data(Dataset):
    def __init__(self, x, y):
        super().__init__()
        self.x = torch.tensor(x, dtype= torch.float32)
        self.y = torch.tensor(y, dtype= torch.float32)
    def __getitem__(self, ind):
        return self.x[ind], self.y[ind]
    def __len__(self, ):
        return self.x.shape[0]

In [5]:
train = Data(xtrain, ytrain)
train_loader = DataLoader(train, shuffle= True, batch_size= 256, drop_last= True)
test = Data(xtest, ytest)
test_loader = DataLoader(test, batch_size= 256, drop_last= True)

In [6]:
class RNNModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.rnn = torch.nn.RNN(input_size=1, hidden_size= 5, num_layers= 1, batch_first= True)
        self.fc1 = torch.nn.Linear(in_features= 5, out_features= 1)
    def forward(self, x):
        y, h = self.rnn(x)
        # trying to extract the last output of entire vector for all layers and directions
        y = y[: , -1, : ]
        y = self.fc1(torch.relu(y))
        return y

In [7]:
model = RNNModel()
model.to('cuda')
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)

In [None]:
for epoch in range(1500):
    running_loss = 0.0
    for input, target in train_loader:
        optimizer.zero_grad()
        input, target = input.reshape((-1, sequence_length, 1)).to('cuda'), target.to('cuda')
        output = model(input).reshape(-1)
        loss = criterion(output, target)
        loss.backward()
        running_loss += loss.item()
        optimizer.step()
    if epoch % 50 == 0:
        print(f'epoch - {epoch}, loss - {running_loss}')

Epoch - 0, loss - 1.884418249130249
Epoch - 50, loss - 0.2572912871837616
Epoch - 100, loss - 0.20053139980882406
Epoch - 150, loss - 0.06450021162163466
Epoch - 200, loss - 0.020761054707691073
Epoch - 250, loss - 0.014194080606102943
Epoch - 300, loss - 0.010789531806949526
Epoch - 350, loss - 0.008891828139894642
Epoch - 400, loss - 0.008095994344330393
Epoch - 450, loss - 0.007598797819809988
Epoch - 500, loss - 0.00723774015204981
Epoch - 550, loss - 0.0069351830024970695
Epoch - 600, loss - 0.006659524151473306
Epoch - 650, loss - 0.006566434181877412
Epoch - 700, loss - 0.006449092979892157
Epoch - 750, loss - 0.006231920735444874
Epoch - 800, loss - 0.006118021599831991
Epoch - 850, loss - 0.00571413486613892
Epoch - 900, loss - 0.005948175268713385
Epoch - 950, loss - 0.005787626338133123
Epoch - 1000, loss - 0.005815909702505451
Epoch - 1050, loss - 0.005630299361655489
Epoch - 1100, loss - 0.00559011688164901
Epoch - 1150, loss - 0.005475175072206184
Epoch - 1200, loss - 0.0

In [13]:
model.eval()
all_preds, all_labels = [], []
for input, target in test_loader:
    input, target = input.reshape((-1, sequence_length, 1)).to('cuda'), target.to('cuda')
    output = model(input).reshape(-1)
    all_preds.extend(output.to('cpu').detach().numpy())
    all_labels.extend(target.to('cpu').detach().numpy())

In [15]:
from sklearn.metrics import mean_squared_error
print(mean_squared_error(all_preds, all_labels))

4.5176323252103486e-05
