## Loading Data

In [2]:
from pandas_datareader import data as pdr

import yfinance as yf
yf.pdr_override()

df = pdr.get_data_yahoo("IBM", start="2019-01-01", end="2024-01-01")

[*********************100%%**********************]  1 of 1 completed


## Reverse the data

In [4]:
reversed_df = df.iloc[::-1]
reversed_df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-12-29,163.750000,164.179993,162.830002,163.550003,162.072403,2525600
2023-12-28,163.960007,163.960007,163.399994,163.750000,162.270599,2071300
2023-12-27,163.139999,163.639999,162.679993,163.460007,161.983231,3234600
2023-12-26,162.229996,163.309998,162.050003,163.210007,161.735489,1772400
2023-12-22,161.100006,162.410004,161.000000,162.139999,160.675140,2439800
...,...,...,...,...,...,...
2019-01-08,114.397705,115.267685,113.747612,114.560226,88.952118,4982726
2019-01-07,112.332695,113.604210,111.539200,112.954109,87.705025,3923755
2019-01-04,109.856598,112.323135,109.407265,112.160614,87.088921,4683779
2019-01-03,109.493309,109.827919,107.734222,107.944550,83.815269,4546648


## Normalize the Data

In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_features = scaler.fit_transform(reversed_df) 

## Set up Sequences for RNN 
The way I think it makes sense right now is to have the features of the network be all available feature columns (Open, High, Low, Close, Adj Close, and Volume) and then as the label we are targeting be the next day's Abj Close. 

Once this is working the way that it needs to we can then set up a label for predicitng the previous day's. I think that it should be the same work flow. For a feature datapoint we are just trying to predict Adj Close -1 vs Adj Close +1

In [8]:
import numpy as np

def create_sequences(data, seq_length):
    xs = []
    ys = []

    for i in range(len(data)-seq_length-1):
        x = data[i:(i+seq_length)]
        y = data[i+seq_length][4] 
        xs.append(x)
        ys.append(y)

    return np.array(xs), np.array(ys)

seq_length = 5
X, y = create_sequences(scaled_features, seq_length)

In [9]:
print(X.shape,y.shape)

(1252, 5, 6) (1252,)


## Split the Data

In [10]:
split_fraction = 0.8
split = int(split_fraction * len(X))

X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [11]:
print(f'X Training set: {X_train.shape}\ny Training set: {y_train.shape}')

X Training set: (1001, 5, 6)
y Training set: (1001,)


## Convert to Torch Tensors and set up Dataloader

In [12]:
import torch
from torch.utils.data import TensorDataset, DataLoader

batch_size = 64  

train_data = TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train))
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)

test_data = TensorDataset(torch.Tensor(X_test), torch.Tensor(y_test))
test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)

In [13]:
train_data[1]

(tensor([[ 3.1027,  3.0329,  3.1098,  3.0698,  3.0825, -0.9514],
         [ 3.0272,  3.0031,  3.0445,  3.0432,  3.0638, -0.5675],
         [ 2.9435,  2.9724,  2.9873,  3.0203,  3.0476, -1.0500],
         [ 2.8395,  2.8886,  2.8920,  2.9221,  2.9785, -0.8298],
         [ 2.7925,  2.7647,  2.7586,  2.7973,  2.8907, -0.6506]]),
 tensor(2.8436))

## Setting up the RNN Class

In [14]:
import torch
import torch.nn as nn

class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleRNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, num_layers=1, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size)
        # Forward propagate the RNN
        out, _ = self.rnn(x, h0)
        # Pass the output of the last time step to the classifier
        out = self.fc(out[:, -1, :])
        return out

## Define instance of model/training parameters

In [15]:
import torch.optim as optim

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device == "cuda":
    print("Running on GPU")
else: print("Running on CPU")

#Model Define
model = SimpleRNN(input_size=6, hidden_size=20, output_size=1).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 100
model.train()

Running on CPU


SimpleRNN(
  (rnn): RNN(6, 20, batch_first=True)
  (fc): Linear(in_features=20, out_features=1, bias=True)
)

## Training

In [16]:
for epoch in range(num_epochs):
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        
        # Calculate loss
        loss = criterion(outputs, labels.unsqueeze(-1))
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/100], Loss: 0.3137
Epoch [2/100], Loss: 0.2532
Epoch [3/100], Loss: 0.2782
Epoch [4/100], Loss: 0.1838
Epoch [5/100], Loss: 0.0619
Epoch [6/100], Loss: 0.0451
Epoch [7/100], Loss: 0.0295
Epoch [8/100], Loss: 0.0513
Epoch [9/100], Loss: 0.0489
Epoch [10/100], Loss: 0.0244
Epoch [11/100], Loss: 0.0148
Epoch [12/100], Loss: 0.0267
Epoch [13/100], Loss: 0.0154
Epoch [14/100], Loss: 0.0284
Epoch [15/100], Loss: 0.0175
Epoch [16/100], Loss: 0.0078
Epoch [17/100], Loss: 0.0179
Epoch [18/100], Loss: 0.0169
Epoch [19/100], Loss: 0.0119
Epoch [20/100], Loss: 0.0177
Epoch [21/100], Loss: 0.0109
Epoch [22/100], Loss: 0.0145
Epoch [23/100], Loss: 0.0194
Epoch [24/100], Loss: 0.0145
Epoch [25/100], Loss: 0.0179
Epoch [26/100], Loss: 0.0120
Epoch [27/100], Loss: 0.0137
Epoch [28/100], Loss: 0.0118
Epoch [29/100], Loss: 0.0100
Epoch [30/100], Loss: 0.0105
Epoch [31/100], Loss: 0.0089
Epoch [32/100], Loss: 0.0115
Epoch [33/100], Loss: 0.0065
Epoch [34/100], Loss: 0.0124
Epoch [35/100], Loss: 0

## Evaluation

In [17]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

model.eval()

actuals = []
predictions = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)

        actuals.extend(labels.cpu().numpy())
        predictions.extend(outputs.cpu().numpy())

actuals = np.array(actuals)
predictions = np.array(predictions)

mae = mean_absolute_error(actuals, predictions)
mse = mean_squared_error(actuals, predictions)
rmse = np.sqrt(mse)

print(f'Mean Absolute Error: {mae:.4f}')
print(f'Mean Squared Error: {mse:.4f}')
print(f'Root Mean Squared Error: {rmse:.4f}')

Mean Absolute Error: 0.0753
Mean Squared Error: 0.0094
Root Mean Squared Error: 0.0968


### Metrics? 
Not sure what we want to use for evaluating the model.

In [18]:
actuals

array([-0.6965378 , -0.75888395, -0.6341917 , -0.6524394 , -0.6488905 ,
       -0.61999846, -0.6179707 , -0.6701801 , -0.677784  , -0.68741417,
       -0.6919758 , -0.68792176, -0.63165736, -0.71073055, -0.7031276 ,
       -0.70262057, -0.7381024 , -0.80450404, -0.80044854, -0.7938587 ,
       -0.7538153 , -0.67575574, -0.7102225 , -0.6433153 , -0.59871036,
       -0.681331  , -0.70667505, -0.739116  , -0.6722068 , -0.6828526 ,
       -0.67828906, -0.69856507, -0.6742351 , -0.6179707 , -0.62405396,
       -0.51558167, -0.51152664, -0.53839093, -0.5829765 , -0.59399813,
       -0.70120424, -0.7913781 , -0.71523094, -0.7868692 , -0.67916304,
       -0.7057126 , -0.7743448 , -0.7588149 , -0.7798564 , -0.8489892 ,
       -0.7733436 , -0.7648275 , -0.37157038, -0.3269833 , -0.37507656,
       -0.33900613, -0.4206643 , -0.49380538, -0.5584303 , -0.41314888,
       -0.3274849 , -0.37607774, -0.39260992, -0.29391894, -0.20575148,
       -0.314961  , -0.299431  , -0.31896925, -0.3931125 , -0.37

In [19]:
predictions

array([[-0.58056784],
       [-0.7286117 ],
       [-0.60630035],
       [-0.59714234],
       [-0.61857355],
       [-0.5639907 ],
       [-0.5369048 ],
       [-0.576174  ],
       [-0.6669287 ],
       [-0.6495438 ],
       [-0.6728127 ],
       [-0.6065775 ],
       [-0.6200795 ],
       [-0.6780084 ],
       [-0.6862066 ],
       [-0.6772333 ],
       [-0.7089276 ],
       [-0.7429358 ],
       [-0.7444322 ],
       [-0.73398966],
       [-0.8021116 ],
       [-0.63151777],
       [-0.6528645 ],
       [-0.56816906],
       [-0.5412107 ],
       [-0.6182744 ],
       [-0.6640591 ],
       [-0.69886684],
       [-0.66517186],
       [-0.61094874],
       [-0.6578084 ],
       [-0.6330788 ],
       [-0.67265594],
       [-0.5827613 ],
       [-0.5403139 ],
       [-0.49558994],
       [-0.4500351 ],
       [-0.42878187],
       [-0.48165432],
       [-0.50793916],
       [-0.6269009 ],
       [-0.7327473 ],
       [-0.6940999 ],
       [-0.72503006],
       [-0.6559052 ],
       [-0