In [1]:
import yfinance as yf
from pandas_datareader import data as pdr
import numpy as np
import inspect
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn, optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [28]:
yf.pdr_override()

# Define parameters
ticker = 'AAPL'  # Example ticker symbol for Apple Inc.
start_date = '2020-01-01'
end_date = '2024-01-01'
sequence_length = 5  # Length of each sequence

# Fetch data
data = pdr.get_data_yahoo(ticker, start=start_date, end=end_date)
data['Adj Close Percent Change'] = data['Adj Close'].pct_change()
data = data[1:] # Remove the NAN row for pct_change
data

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Adj Close Percent Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01-03,74.287498,75.144997,74.125000,74.357498,72.349121,146322800,-0.009722
2020-01-06,73.447502,74.989998,73.187500,74.949997,72.925636,118387200,0.007969
2020-01-07,74.959999,75.224998,74.370003,74.597504,72.582649,108872000,-0.004703
2020-01-08,74.290001,76.110001,74.290001,75.797501,73.750267,132079200,0.016087
2020-01-09,76.809998,77.607498,76.550003,77.407501,75.316750,170108400,0.021240
...,...,...,...,...,...,...,...
2023-12-22,195.179993,195.410004,192.970001,193.600006,193.353287,37122800,-0.005547
2023-12-26,193.610001,193.889999,192.830002,193.050003,192.803986,28919300,-0.002841
2023-12-27,192.490005,193.500000,191.089996,193.149994,192.903839,48087700,0.000518
2023-12-28,194.139999,194.660004,193.169998,193.580002,193.333298,34049900,0.002226


In [40]:
# Preprocess data
scaler = StandardScaler()
scaled_features = scaler.fit_transform(data)

In [52]:
sequences = []
labels_first = []
labels_last = []

for i in range(len(scaled_features) - sequence_length):
    sequences.append(scaled_features[i:i+sequence_length])
    labels_first.append(scaled_features[i][-1])  # First element of the sequence
    labels_last.append(scaled_features[i+sequence_length-1][-1])  # Last element of the sequence

In [53]:
print(labels_first)

[-0.5161409736231496, 0.32088099421917676, -0.2786703844011945, 0.7049861290784489, 0.9488248494257169, 0.05082415721350406, 0.9547059288960035, -0.6950395456211726, -0.25889650529080205, 0.5365250723748384, 0.4676708223688612, -0.37679156289438187, 0.11275246776317954, 0.1717121681901661, -0.19249554239112163, -1.4474114162777665, 1.2823265115067164, 0.9342582183884489, -0.12471294387207556, -2.1539707580461527, -0.18607600622622014, 1.505850733835667, 0.3296775563346965, 0.4972814783457459, -0.6992522404744969, 0.1685739783536659, -0.34160999247646845, 1.0674710289679454, -0.39306264163823545, -0.04449774012334884, -0.9224806465537165, 0.6291011020762811, -0.5415481188648967, -1.1270808053904724, -2.303575204618877, -1.6587721787973138, 0.6944316411520692, -3.148978123896322, -0.0838216660695302, 4.348839785495395, -1.5588038334773213, 2.138491199916014, -1.590865240437344, -0.6844692711263097, -3.7983042429899467, 3.3514902331171355, -1.6993742147057225, -4.7286276616093135, 5.61247

In [51]:
print(labels_last)

[0.9488248494257169, 0.05082415721350406, 0.9547059288960035, -0.6950395456211726, -0.25889650529080205, 0.5365250723748384, 0.4676708223688612, -0.37679156289438187, 0.11275246776317954, 0.1717121681901661, -0.19249554239112163, -1.4474114162777665, 1.2823265115067164, 0.9342582183884489, -0.12471294387207556, -2.1539707580461527, -0.18607600622622014, 1.505850733835667, 0.3296775563346965, 0.4972814783457459, -0.6992522404744969, 0.1685739783536659, -0.34160999247646845, 1.0674710289679454, -0.39306264163823545, -0.04449774012334884, -0.9224806465537165, 0.6291011020762811, -0.5415481188648967, -1.1270808053904724, -2.303575204618877, -1.6587721787973138, 0.6944316411520692, -3.148978123896322, -0.0838216660695302, 4.348839785495395, -1.5588038334773213, 2.138491199916014, -1.590865240437344, -0.6844692711263097, -3.7983042429899467, 3.3514902331171355, -1.6993742147057225, -4.7286276616093135, 5.61247028828957, -6.142950060776422, 2.0242670753830447, -1.214385780733045, -0.418664928

In [44]:
# Convert to tensors
X_train, X_test, y_train_first, y_test_first, y_train_last, y_test_last = train_test_split(sequences, labels_first, labels_last, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_first_tensor = torch.FloatTensor(y_train_first)
y_train_last_tensor = torch.FloatTensor(y_train_last)

X_test_tensor = torch.FloatTensor(X_test)
y_test_first_tensor = torch.FloatTensor(y_test_first)
y_test_last_tensor = torch.FloatTensor(y_test_last)

# Create DataLoaders
batch_size = 32

train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_first_tensor, y_train_last_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = torch.utils.data.TensorDataset(X_test_tensor, y_test_first_tensor, y_test_last_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [45]:
# Example usage of DataLoader for training set
for sequences, first_labels, last_labels in train_loader:
    print("Batch Sequences:")
    print(sequences)
    print("Batch First Labels:")
    print(first_labels)
    print("Batch Last Labels:")
    print(last_labels)
    break  # Print only the first batch for demonstration



Batch Sequences:
tensor([[[-1.4148, -1.4232, -1.3809,  ..., -1.3859,  0.4100,  1.0652],
         [-1.3950, -1.4339, -1.3879,  ..., -1.4312,  0.0866, -0.8334],
         [-1.4136, -1.4135, -1.3739,  ..., -1.3792, -0.1579,  0.8508],
         [-1.4004, -1.3815, -1.3722,  ..., -1.3457,  1.0888,  0.5164],
         [-1.1388, -1.0782, -1.1591,  ..., -1.0526,  5.0643,  4.8971]],

        [[-0.9789, -0.9217, -0.9457,  ..., -0.9747,  3.8473,  1.1361],
         [-0.9508, -0.9471, -0.9307,  ..., -0.9535,  1.3636,  0.2598],
         [-0.9435, -0.9590, -0.9153,  ..., -0.9419,  0.4204,  0.1154],
         [-0.9126, -0.8385, -0.8881,  ..., -0.8301,  1.9034,  1.5946],
         [-0.8283, -0.8606, -0.8732,  ..., -0.9055,  1.8228, -1.1319]],

        [[ 0.0589,  0.0585,  0.0928,  ...,  0.1198, -0.6076,  0.3707],
         [ 0.0971,  0.1265,  0.0638,  ...,  0.1535, -0.3908,  0.3177],
         [ 0.2454,  0.2635,  0.2706,  ...,  0.3129,  0.3571,  1.6974],
         [ 0.2192,  0.4493,  0.2603,  ...,  0.4217,  1.0

In [46]:
class BiRNN(nn.Module):

    def save_hyperparameters(self, ignore=[]):
        """Save function arguments into class attributes."""
        frame = inspect.currentframe().f_back
        _, _, _, local_vars = inspect.getargvalues(frame)
        self.hparams = {k:v for k, v in local_vars.items()
                        if k not in set(ignore+['self']) and not k.startswith('_')}
        for k, v in self.hparams.items():
            setattr(self, k, v)

    def __init__(self, num_inputs, num_hiddens, num_layers=1, sigma=0.01):
        super().__init__()
        self.save_hyperparameters()
        self.num_inputs = int(num_inputs)
        self.num_hiddens = int(num_hiddens)
        self.f_rnn = nn.RNN(num_inputs, num_hiddens, num_layers=num_layers)
        self.b_rnn = nn.RNN(num_inputs, num_hiddens, num_layers=num_layers)
        self.num_hiddens *= 2  # The output dimension will be doubled

    def forward(self, inputs, Hs=None):
      f_H, b_H = Hs if Hs is not None else (None, None)
      f_outputs, f_H = self.f_rnn(inputs, f_H)
      b_outputs, b_H = self.b_rnn(reversed(inputs), b_H)
      outputs = torch.cat((f_outputs, reversed(b_outputs)), dim=2)
      return outputs[:, [0, -1], :], (f_H, b_H)

In [47]:
print(type(X_train_tensor.size(2)))

<class 'int'>


In [77]:
def train(model, train_loader, criterion, optimizer, epochs=10, device='cpu'):
    model.train()  # Set the model to training mode
    model.to(device)  # Move model to the specified device (e.g., GPU)

    for epoch in range(epochs):
        total_loss = 0.0
        total_L1 = 0.0
        total_L2 = 0.0

        for sequences, first_labels, last_labels in train_loader:
            sequences, first_labels, last_labels = sequences.to(device), first_labels.to(device), last_labels.to(device)

            optimizer.zero_grad()  # Zero the gradients

            # Forward pass
            outputs, _ = model(sequences)

            # Separate outputs into first and last elements
            batch_size = outputs.size(0)
            predicted_first = outputs[:, 0, 0]
            predicted_last = outputs[:, -1, -1]

            # Calculate L1 and L2 losses
            loss_1 = criterion(predicted_first, first_labels)
            loss_2 = criterion(predicted_last, last_labels)

            # Total loss as the sum of L1 and L2 losses
            loss = loss_1 + loss_2
            total_L1 += loss_1.item()
            total_L2 += loss_2.item()
            total_loss += loss.item()

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

        # Print average loss for the epoch
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss / len(train_loader):.4f}, L1: {total_L1 / len(train_loader):.4f}, L2: {total_L2 / len(train_loader):.4f}")

# Example usage:
model = BiRNN(num_inputs=7, num_hiddens=64)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)

train(model, train_loader, criterion, optimizer, epochs=100, device='cpu')


Epoch [1/100], Loss: 2.1228, L1: 1.0635, L2: 1.0593
Epoch [2/100], Loss: 1.6485, L1: 0.8477, L2: 0.8008
Epoch [3/100], Loss: 1.3131, L1: 0.6564, L2: 0.6567
Epoch [4/100], Loss: 1.1072, L1: 0.5517, L2: 0.5555
Epoch [5/100], Loss: 0.9703, L1: 0.4844, L2: 0.4859
Epoch [6/100], Loss: 0.8801, L1: 0.4403, L2: 0.4398
Epoch [7/100], Loss: 0.8125, L1: 0.4053, L2: 0.4072
Epoch [8/100], Loss: 0.7607, L1: 0.3797, L2: 0.3810
Epoch [9/100], Loss: 0.7228, L1: 0.3600, L2: 0.3628
Epoch [10/100], Loss: 0.6906, L1: 0.3432, L2: 0.3474
Epoch [11/100], Loss: 0.6645, L1: 0.3303, L2: 0.3342
Epoch [12/100], Loss: 0.6441, L1: 0.3200, L2: 0.3241
Epoch [13/100], Loss: 0.6279, L1: 0.3118, L2: 0.3161
Epoch [14/100], Loss: 0.6132, L1: 0.3044, L2: 0.3089
Epoch [15/100], Loss: 0.6013, L1: 0.2983, L2: 0.3030
Epoch [16/100], Loss: 0.5914, L1: 0.2935, L2: 0.2979
Epoch [17/100], Loss: 0.5830, L1: 0.2890, L2: 0.2940
Epoch [18/100], Loss: 0.5754, L1: 0.2853, L2: 0.2901
Epoch [19/100], Loss: 0.5688, L1: 0.2820, L2: 0.2868
Ep