In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [None]:
# Set the hyperparameters
HIDDEN_SIZE = 32
NUM_EPOCHS = 100
LAG = 10
# learning_rate = 0.001

In [None]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out)
        return out

In [None]:
device = torch.device("cpu")
if torch.cuda.is_available():
    device = torch.device("cuda")

stock = 'AAPL'

In [None]:
data = pd.read_csv(f'./data_feed/{stock}.csv')
data.set_index('Date', inplace=True)
data.index = pd.to_datetime(data.index)
split_date = data.index[0] + pd.offsets.DateOffset(years=8)
train_data = data[data.index <= split_date]
test_data = data[data.index > split_date]
train_close = train_data['Adj Close']
test_close = train_data['Adj Close']

In [None]:
scaler = MinMaxScaler()
scaled_train = scaler.fit_transform(train_close.values.reshape((-1, 1)))
scaled_test = scaler.transform(test_close.values.reshape((-1, 1)))

In [None]:
class TimeSeriesdataset(Dataset):
    def __init__(self, lag: int, data: np.ndarray, device: torch.device):
        self.lag = lag
        self.data = data.reshape(1, -1)[0]
        self.device = device

    def __len__(self):
        return len(self.data) - (self.lag-1) - 1

    def __getitem__(self, idx):
        X = self.data[idx:idx+self.lag]
        Y = np.array([self.data[idx+self.lag]])
        return torch.Tensor(X, device=self.device), torch.Tensor(Y, device=self.device)

In [None]:
train_dataset = TimeSeriesdataset(lag=LAG, data=scaled_train, device=device)
test_dataset = TimeSeriesdataset(lag=LAG, data=scaled_test, device=device)

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
# Create the RNN model
model = RNN(LAG, HIDDEN_SIZE, 1).to(device)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters())

In [None]:
# Training loop
running_loss = 0.
last_loss = 0.
for epoch in range(NUM_EPOCHS):
    for i, batch in enumerate(train_dataloader):
        X, y = batch
        predict = model(X)
        loss = criterion(predict, y)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    if (epoch + 1) % 10 == 0:
        last_loss = running_loss/10
        print(f'Epoch: {epoch + 1}/{NUM_EPOCHS}, Loss: {last_loss}')
        running_loss = 0

Epoch: 10/100, Loss: 0.14416552316470188
Epoch: 20/100, Loss: 0.03478086851719127
Epoch: 30/100, Loss: 0.0409609358470334
Epoch: 40/100, Loss: 0.014045450348567102
Epoch: 50/100, Loss: 0.07830436727817869
Epoch: 60/100, Loss: 0.035995376784012476
Epoch: 70/100, Loss: 0.009319417622646143
Epoch: 80/100, Loss: 0.009087552375422091
Epoch: 90/100, Loss: 0.02294000678939483
Epoch: 100/100, Loss: 0.10518183754720667


In [None]:
# Generate predictions
model.eval()
with torch.no_grad():
    for i, batch in enumerate(test_dataloader):
        X, y = batch
        predict = model(X)