In [None]:
from Utils.Data_Processing import *

import os

import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
T_PAST = 60
T_FUT = 20

In [None]:
DATA_DIR = './Data'
STOCK_DATA_DIR = os.path.join(DATA_DIR, 'stocks')
WINDOW_DATA_DIR = os.path.join(DATA_DIR, f'windowed_data_{T_PAST}_{T_FUT}')
WINDOW_DT_DATA_DIR = os.path.join(DATA_DIR, f'windowed_dt_data_{T_PAST}_{T_FUT}')

TEST_STOCKS = {'NVDA', 'GM', 'LMT', 'HPQ', 'FWONK', 'MSI', 'ARM', 'MSFT', 'JNJ'}

## Read Data
  see [PCA_windowed_data_analysis.ipynb](./PCA_windowed_data_analysis.ipynb) for more comments/checks/visualizations

In [None]:
train_mat = np.array([], dtype=np.float32).reshape(0, T_PAST+T_FUT)
train_mat_dt = np.array([], dtype=np.float16).reshape(0, T_PAST+T_FUT)
test_mat = np.array([], dtype=np.float32).reshape(0, T_PAST+T_FUT)
test_mat_dt = np.array([], dtype=np.float16).reshape(0, T_PAST+T_FUT)

for f_name in sorted(os.listdir(WINDOW_DATA_DIR)):
  f_dir = os.path.join(DATA_DIR, f_name)
  stock_ticker = f_name.split('.')[0][:-len('_windows')]
  
  is_test = stock_ticker in TEST_STOCKS
  
  stock_windows = np.load(os.path.join(WINDOW_DATA_DIR, f'{stock_ticker}_windows.npy'))
  stock_windows_dt = np.load(os.path.join(WINDOW_DT_DATA_DIR, f'{stock_ticker}_windows_dt.npy'))
  
  mat_for_stock = test_mat if is_test else train_mat
  dt_mat_for_stock = test_mat_dt if is_test else train_mat_dt
  
  mat_for_stock = np.concatenate(
    (mat_for_stock, stock_windows),
    axis=0
  )
  dt_mat_for_stock = np.concatenate(
    (dt_mat_for_stock, stock_windows_dt),
    axis=0
  )
  
  if is_test:
    test_mat = mat_for_stock
    test_mat_dt = dt_mat_for_stock
  else:
    train_mat = mat_for_stock
    train_mat_dt = dt_mat_for_stock

In [None]:
train_past, train_fut, train_dt_past, train_dt_fut = split_past_fut(train_mat, train_mat_dt, T_PAST)
test_past, test_fut, test_dt_past, test_dt_fut = split_past_fut(test_mat, test_mat_dt, T_PAST)

In [None]:
scaled_tr_past, S0_tr_past = scale_by_1st_col(train_past)
scaled_tr_fut, S0_tr_fut = scale_by_1st_col(train_fut)

scaled_te_past, S0_te_past = scale_by_1st_col(test_past)
scaled_te_fut, S0_te_fut = scale_by_1st_col(test_fut)

# Pytorch Analysis
## MLP Model

In [None]:
X_train_tensor = torch.tensor(scaled_tr_past, dtype=torch.float32)
y_train_tensor = torch.tensor(scaled_tr_fut, dtype=torch.float32)
X_test_tensor = torch.tensor(scaled_te_past, dtype=torch.float32)
y_test_tensor = torch.tensor(scaled_te_fut, dtype=torch.float32)

# Build the neural network model
# Define the model architecture
model = nn.Sequential(
  nn.Linear(T_PAST, 100),
  nn.ReLU(),
  nn.Linear(100, 100),
  nn.ReLU(),
  nn.Linear(100, 40),
  nn.ReLU(),
  nn.Linear(40, T_FUT)
)

# Define loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error loss
optimizer = optim.Adam(model.parameters())  # Adam optimizer

num_epochs = 1000
epochs = list(range(num_epochs))
train_losses, test_losses, train_maes, test_maes = [], [], [], []

# Train the model
for epoch in epochs:
    optimizer.zero_grad()  # Zero the gradients
    train_outputs = model(X_train_tensor)  # Forward pass
    loss = criterion(train_outputs, y_train_tensor)  # Calculate the loss
    loss.backward()  # Backward pass
    optimizer.step()  # Update weights

    # Evaluate the model
    with torch.no_grad():
        test_outputs = model(X_test_tensor)
        train_loss = criterion(train_outputs, y_train_tensor)
        train_losses.append(train_loss)
        train_mae = torch.mean(torch.abs(train_outputs - y_train_tensor))
        train_maes.append(train_mae)

        test_loss = criterion(test_outputs, y_test_tensor)
        test_losses.append(test_loss)

        test_mae = torch.mean(torch.abs(test_outputs - y_test_tensor))
        test_maes.append(test_mae)
      

print("Train Loss:", train_loss.item())
print("Test Loss:", test_loss.item())
print("Train MAE:", train_mae.item())
print("Test MAE:", test_mae.item())

In [None]:
# Plot all metrics on one plot
plt.figure(figsize=(10, 5))

# Plot training and testing losses
plt.plot(epochs, train_losses, label='Train Loss', color='blue')
plt.plot(epochs, test_losses, label='Test Loss', linestyle='dashed', color='blue')

# # Plot training and testing MAEs
# plt.plot(epochs, train_maes, label='Train MAE', color='red')
# plt.plot(epochs, test_maes, label='Test MAE', linestyle='dashed', color='red')

plt.xlabel('Epochs')
plt.title('Training and Testing Metrics')
plt.legend()
plt.grid(True)
plt.show()

print('final train loss', round(train_losses[-1].item(), 4))
print('final test loss', round(test_losses[-1].item(), 4))

In [None]:
fig, ax = plt.subplots(2, 5, figsize=(20, 10))

with torch.no_grad():
  for i in range(10):
      # Split data into input and output
      X = train_past[i:i+1, :T_PAST] # Input data
      X = torch.tensor(X/X[:, 0:1], dtype=torch.float32)
      y = train_fut[i:i+1, :T_PAST]  # Output data
      y = torch.tensor(y/y[0:1, 0:1])
      train_outputs = model(X)
      train_outputs = train_outputs/train_outputs[:, 0:1]
      ax[i//5, i%5].plot(train_outputs.numpy()[0], label='pred. fut.')
      ax[i//5, i%5].plot(y.numpy()[0], label='act. fut.')
      ax[i//5, i%5].legend()

In [None]:
fig, ax = plt.subplots(2, 5, figsize=(20, 10))

with torch.no_grad():
  for i in range(10):
      # Split data into input and output
      X = test_past[i:i+1, :120] # Input data
      X = torch.tensor(X/X[:, 0:1], dtype=torch.float32)
      y = test_fut[i:i+1, :120]  # Output data
      y = torch.tensor(y/y[:, 0:1])
      test_outputs = model(X)
      test_outputs = test_outputs/test_outputs[:, 0:1]
      ax[i//5, i%5].plot(test_outputs.numpy()[0], label='pred. fut.')
      ax[i//5, i%5].plot(y.numpy()[0], label='act. fut.')
      ax[i//5, i%5].legend()

## CNN
1D Convolutional model

In [None]:
model = nn.Sequential(
  #
  nn.Conv1d(in_channels=1, out_channels=1, kernel_size=10),
  nn.Linear(120, 80),
  nn.ReLU(),
  nn.Linear(80, 40),
  nn.ReLU(),
  nn.Linear(40, 20)
)

# Define loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error loss
optimizer = optim.Adam(model.parameters())  # Adam optimizer

num_epochs = 1000
epochs = list(range(num_epochs))
train_losses, test_losses, train_maes, test_maes = [], [], [], []

# Train the model
for epoch in epochs:
    optimizer.zero_grad()  # Zero the gradients
    train_outputs = model(X_train_tensor)  # Forward pass
    loss = criterion(train_outputs, y_train_tensor)  # Calculate the loss
    loss.backward()  # Backward pass
    optimizer.step()  # Update weights

    # Evaluate the model
    with torch.no_grad():
        test_outputs = model(X_test_tensor)
        train_loss = criterion(train_outputs, y_train_tensor)
        train_losses.append(train_loss)
        train_mae = torch.mean(torch.abs(train_outputs - y_train_tensor))
        train_maes.append(train_mae)

        test_loss = criterion(test_outputs, y_test_tensor)
        test_losses.append(test_loss)

        test_mae = torch.mean(torch.abs(test_outputs - y_test_tensor))
        test_maes.append(test_mae)

        

print("Train Loss:", train_loss.item())
print("Test Loss:", test_loss.item())
print("Train MAE:", train_mae.item())
print("Test MAE:", test_mae.item())