## Loading Data

In [2]:
from pandas_datareader import data as pdr

import yfinance as yf
yf.pdr_override()

df = pdr.get_data_yahoo("IBM", start="2019-01-01", end="2024-01-01")

[*********************100%%**********************]  1 of 1 completed


In [6]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-02,107.084129,110.879539,106.778206,110.143402,85.522598,4434935
2019-01-03,109.493309,109.827919,107.734222,107.944550,83.815285,4546648
2019-01-04,109.856598,112.323135,109.407265,112.160614,87.088913,4683779
2019-01-07,112.332695,113.604210,111.539200,112.954109,87.705017,3923755
2019-01-08,114.397705,115.267685,113.747612,114.560226,88.952126,4982726
...,...,...,...,...,...,...
2023-12-22,161.100006,162.410004,161.000000,162.139999,160.675140,2439800
2023-12-26,162.229996,163.309998,162.050003,163.210007,161.735489,1772400
2023-12-27,163.139999,163.639999,162.679993,163.460007,161.983231,3234600
2023-12-28,163.960007,163.960007,163.399994,163.750000,162.270599,2071300


## Normalize the Data

In [7]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_features = scaler.fit_transform(df) 

In [24]:
scaled_features.shape

(1258, 6)

## Sequences/Batching

In [37]:
import numpy as np

def create_sequences(data, seq_length):
    xs = []
    ys = []

    for i in range(len(data)-seq_length-1):
        x = data[i:(i+seq_length)]
        y = data[i+seq_length][4] 
        xs.append(x)
        ys.append(y)

    return np.array(xs), np.array(ys)

seq_length = 5
X, y = create_sequences(scaled_features, seq_length)

In [38]:
X.shape

(1252, 5, 6)

## Split the Data

In [10]:
split_fraction = 0.8
split = int(split_fraction * len(X))

X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [22]:
X_train.shape

(1001, 5, 6)

In [21]:
y_train.shape

(1001,)

## Convert to Torch Tensor

In [11]:
import torch
from torch.utils.data import TensorDataset, DataLoader

batch_size = 64  # Example batch size

train_data = TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train))
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)

test_data = TensorDataset(torch.Tensor(X_test), torch.Tensor(y_test))
test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)

In [17]:
train_data[:10]

(tensor([[[-2.1319, -1.9114, -2.0294, -1.8493, -1.9183, -0.1714],
          [-1.9102, -2.0093, -1.9426, -2.0510, -2.0295, -0.1346],
          [-1.8767, -1.7769, -1.7908, -1.6642, -1.8162, -0.0893],
          [-1.6489, -1.6576, -1.5973, -1.5913, -1.7761, -0.3401],
          [-1.4588, -1.5026, -1.3968, -1.4440, -1.6948,  0.0093]],
 
         [[-1.9102, -2.0093, -1.9426, -2.0510, -2.0295, -0.1346],
          [-1.8767, -1.7769, -1.7908, -1.6642, -1.8162, -0.0893],
          [-1.6489, -1.6576, -1.5973, -1.5913, -1.7761, -0.3401],
          [-1.4588, -1.5026, -1.3968, -1.4440, -1.6948,  0.0093],
          [-1.3488, -1.4287, -1.3196, -1.3685, -1.6532, -0.3807]],
 
         [[-1.8767, -1.7769, -1.7908, -1.6642, -1.8162, -0.0893],
          [-1.6489, -1.6576, -1.5973, -1.5913, -1.7761, -0.3401],
          [-1.4588, -1.5026, -1.3968, -1.4440, -1.6948,  0.0093],
          [-1.3488, -1.4287, -1.3196, -1.3685, -1.6532, -0.3807],
          [-1.4218, -1.3877, -1.3127, -1.2720, -1.6000, -0.2853]],
 
 

In [13]:
import torch
import torch.nn as nn

class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleRNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, num_layers=1, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size)
        # Forward propagate the RNN
        out, _ = self.rnn(x, h0)
        # Pass the output of the last time step to the classifier
        out = self.fc(out[:, -1, :])
        return out

## Training

In [14]:
import torch.optim as optim

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device == "cuda":
    print("Running on GPU")
else: print("Running on CPU")

#Model Define
model = SimpleRNN(input_size=6, hidden_size=20, output_size=1).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training 
num_epochs = 100
model.train()

for epoch in range(num_epochs):
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        
        # Calculate loss
        loss = criterion(outputs, labels.unsqueeze(-1))
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluation on test set (this is a very basic form of evaluation)
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        
        # Here you would implement your logic to evaluate the model's performance
        # For example, calculating the mean squared error for regression tasks

print('Evaluation done.')


Running on CPU
Epoch [1/100], Loss: 0.4485
Epoch [2/100], Loss: 0.1292
Epoch [3/100], Loss: 0.1803
Epoch [4/100], Loss: 0.0941
Epoch [5/100], Loss: 0.0469
Epoch [6/100], Loss: 0.0705
Epoch [7/100], Loss: 0.0232
Epoch [8/100], Loss: 0.0344
Epoch [9/100], Loss: 0.0354
Epoch [10/100], Loss: 0.0331
Epoch [11/100], Loss: 0.0105
Epoch [12/100], Loss: 0.0148
Epoch [13/100], Loss: 0.0105
Epoch [14/100], Loss: 0.0129
Epoch [15/100], Loss: 0.0313
Epoch [16/100], Loss: 0.0170
Epoch [17/100], Loss: 0.0222
Epoch [18/100], Loss: 0.0149
Epoch [19/100], Loss: 0.0168
Epoch [20/100], Loss: 0.0162
Epoch [21/100], Loss: 0.0153
Epoch [22/100], Loss: 0.0098
Epoch [23/100], Loss: 0.0145
Epoch [24/100], Loss: 0.0081
Epoch [25/100], Loss: 0.0269
Epoch [26/100], Loss: 0.0178
Epoch [27/100], Loss: 0.0181
Epoch [28/100], Loss: 0.0147
Epoch [29/100], Loss: 0.0176
Epoch [30/100], Loss: 0.0159
Epoch [31/100], Loss: 0.0147
Epoch [32/100], Loss: 0.0134
Epoch [33/100], Loss: 0.0107
Epoch [34/100], Loss: 0.0090
Epoch [3

## Regression Eval

In [52]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

model.eval()  # Set the model to evaluation mode

actuals = []
predictions = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)

        actuals.extend(labels.cpu().numpy())
        predictions.extend(outputs.cpu().numpy())

# Convert lists to numpy arrays for evaluation
actuals = np.array(actuals)
predictions = np.array(predictions)

mae = mean_absolute_error(actuals, predictions)
mse = mean_squared_error(actuals, predictions)
rmse = np.sqrt(mse)

print(f'Mean Absolute Error: {mae:.4f}')
print(f'Mean Squared Error: {mse:.4f}')
print(f'Root Mean Squared Error: {rmse:.4f}')


Mean Absolute Error: 0.1354
Mean Squared Error: 0.0460
Root Mean Squared Error: 0.2145
