In [None]:
%%writefile /content/drive/MyDrive/NSDE/functions/get_data.py
import yfinance as yf
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
import datetime
import math
from sklearn.preprocessing import StandardScaler


def load_data(stock, start_date='2005-01-01', end_date='2023-07-01'):
    stock_data = yf.download(stock, start=start_date, end=end_date)
    df = pd.DataFrame({'Adj_close': stock_data['Adj Close']})
    df['ret'] = df['Adj_close'].diff()
    df['log_close'] = np.log(df['Adj_close'])
    df['log_ret'] = df['log_close'].diff()
    return df



Overwriting /content/drive/MyDrive/NSDE/functions/get_data.py


In [4]:
%%writefile /content/drive/MyDrive/NSDE/functions/data_setup.py
import torch
from torch import nn
import pandas as pd
from torch.utils.data import DataLoader
import os
NUM_WORKERS = os.cpu_count()

def create_xy(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back - 1):
        a = dataset[i:(i + look_back)]
        dataX.append(a)
        dataY.append(dataset[i + look_back])
    return torch.tensor(dataX).to(torch.float32), torch.tensor(dataY).to(torch.float32)

def create_dataloaders(
    train_data,
    test_data,
    batch_size: int,
    num_workers: int=NUM_WORKERS):


  # Turn images into data loaders
  train_dataloader = DataLoader(
      train_data,
      batch_size=batch_size,
      num_workers=num_workers,
  )
  test_dataloader = DataLoader(
      test_data,
      batch_size=batch_size,
      num_workers=num_workers,
  )

  return train_dataloader, test_dataloader


Overwriting /content/drive/MyDrive/NSDE/functions/data_setup.py


In [3]:
%%writefile /content/drive/MyDrive/NSDE/functions/engine.py
import torch
from tqdm.auto import tqdm
def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer):
    # Put model in train mode
    model.train()

    # Setup train loss and train accuracy values
    train_loss = 0

    # Loop through data loader data batches
    for batch, (X, y) in enumerate(dataloader):

        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate  and accumulate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()


    # Adjust metrics to get average loss per batch
    train_loss = train_loss / len(dataloader)
    return train_loss
def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module):
    # Put model in eval mode
    model.eval()

    # Setup test loss and test accuracy values
    test_loss = 0

    # Turn on inference context manager
    with torch.inference_mode():
        # Loop through DataLoader batches
        for batch, (X, y) in enumerate(dataloader):

            # 1. Forward pass
            test_pred = model(X)

            # 2. Calculate and accumulate loss
            loss = loss_fn(test_pred, y)
            test_loss += loss.item()

    # Adjust metrics to get average loss  per batch
    test_loss = test_loss / len(dataloader)
    return test_loss


def train(model: torch.nn.Module,
          train_loader: torch.utils.data.DataLoader,
          test_loader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module = nn.MSELoss(),
          epochs: int = 100):

    # 2. Create empty results dictionary
    results = {"train_loss": [],
        "test_loss": [],
    }

    # 3. Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss= train_step(model=model,
                               dataloader=train_loader,
                               loss_fn=loss_fn,
                               optimizer=optimizer)
        test_loss = test_step(model=model,
                              dataloader=test_loader,
                              loss_fn=loss_fn)

        # 4. Print out what's happening
        if (epoch+1) % 10 == 0:
          print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"test_loss: {test_loss:.4f} | "
        )

        # 5. Update results dictionary
        results["train_loss"].append(train_loss)
        results["test_loss"].append(test_loss)

    # 6. Return the filled results at the end of the epochs
    return results

Writing /content/drive/MyDrive/NSDE/functions/engine.py


In [5]:
%%writefile /content/drive/MyDrive/NSDE/functions/plot.py
import matplotlib.pyplot as plt
import torch

def plot_loss_curves(results: dict[str, list[float]]):

    # Get the loss values of the results dictionary (training and test)
    loss = results['train_loss']
    test_loss = results['test_loss']

    # Figure out how many epochs there were
    epochs = range(len(results['train_loss']))

    # Setup a plot
    plt.figure(figsize=(15, 7))

    plt.plot(epochs, loss, label='train_loss')
    plt.plot(epochs, test_loss, label='test_loss')
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.legend()

def make_pred(model, test_loader, ytest):
    predictions = []
    import torch.nn.functional as F
    with torch.inference_mode():
        # Loop through DataLoader batches
        for batch, (X, y) in enumerate(test_loader):
            pred = model(X)
            predictions.append(pred)

    predictions = torch.cat(predictions)  # Concatenate predictions from all batches
    mse = F.mse_loss(predictions, ytest)

    plt.plot(predictions, label='predicted')
    plt.plot(ytest, label='test')
    plt.legend()
    plt.title(f'MSE: {mse.item():.4f}')  # Display the calculated MSE in the plot title
    plt.show()
    return predictions

Writing /content/drive/MyDrive/NSDE/functions/plot.py
