In [13]:
import yfinance as yf
from pandas_datareader import data as pdr
import numpy as np
import inspect
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn, optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [14]:
yf.pdr_override()

# Define parameters
ticker = 'AAPL'  # Example ticker symbol for Apple Inc.
start_date = '2020-01-01'
end_date = '2024-01-01'
sequence_length = 32  # Length of each sequence

# Fetch data
data = pdr.get_data_yahoo(ticker, start=start_date, end=end_date)
data['Adj Close Percent Change'] = data['Adj Close'].pct_change()
data = data[1:] # Remove the NAN row for pct_change
data

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Adj Close Percent Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01-03,74.287498,75.144997,74.125000,74.357498,72.349144,146322800,-0.009722
2020-01-06,73.447502,74.989998,73.187500,74.949997,72.925629,118387200,0.007968
2020-01-07,74.959999,75.224998,74.370003,74.597504,72.582657,108872000,-0.004703
2020-01-08,74.290001,76.110001,74.290001,75.797501,73.750252,132079200,0.016086
2020-01-09,76.809998,77.607498,76.550003,77.407501,75.316750,170108400,0.021241
...,...,...,...,...,...,...,...
2023-12-22,195.179993,195.410004,192.970001,193.600006,193.353287,37122800,-0.005547
2023-12-26,193.610001,193.889999,192.830002,193.050003,192.803986,28919300,-0.002841
2023-12-27,192.490005,193.500000,191.089996,193.149994,192.903839,48087700,0.000518
2023-12-28,194.139999,194.660004,193.169998,193.580002,193.333298,34049900,0.002226


In [15]:
# Preprocess data
scaler = StandardScaler()
scaled_features = scaler.fit_transform(data)

In [16]:
# Check the mean and standard deviation of each feature
mean_values = np.mean(scaled_features, axis=0)
std_dev_values = np.std(scaled_features, axis=0)

# Print the mean and standard deviation for each feature
for i, (mean, std_dev) in enumerate(zip(mean_values, std_dev_values)):
    print(f"Feature {i+1}: Mean={mean}, Std Dev={std_dev}")

Feature 1: Mean=2.828030789094926e-16, Std Dev=1.0000000000000002
Feature 2: Mean=2.2624246312759407e-16, Std Dev=0.9999999999999999
Feature 3: Mean=-4.5248492625518815e-16, Std Dev=1.0
Feature 4: Mean=1.1312123156379704e-16, Std Dev=0.9999999999999998
Feature 5: Mean=4.5248492625518815e-16, Std Dev=1.0000000000000002
Feature 6: Mean=0.0, Std Dev=1.0
Feature 7: Mean=-3.5350384863686574e-18, Std Dev=1.0


In [17]:
sequences = []
labels_first = []
labels_last = []

for i in range(len(scaled_features) - sequence_length):
    sequences.append(scaled_features[i:i+sequence_length])
    labels_first.append(scaled_features[i][-1])  # First element of the sequence
    labels_last.append(scaled_features[i+sequence_length-1][-1])  # Last element of the sequence

In [18]:
print(labels_first)

[-0.5161214167286139, 0.32086101823190877, -0.2786605927848929, 0.704971348902037, 0.9488351418786568, 0.050833759903772174, 0.9546821118662749, -0.6950259034202327, -0.25890607620582895, 0.5365348923152081, 0.4676662611475334, -0.37678239792599266, 0.11274311190288876, 0.17172158734392398, -0.19250023592135618, -1.4474258496433683, 1.2823513345104727, 0.934248971288914, -0.12470840572724112, -2.1539895491168095, -0.18607610201071936, 1.5058564857916712, 0.3296870441499903, 0.4972908668936842, -0.6992660212350575, 0.1685694278698091, -0.34159633994169397, 1.0674619868053208, -0.3930673189577348, -0.04447952175361832, -0.9224944321958991, 0.6291060065731892, -0.5415437105492555, -1.1270856707669277, -2.3035853753842526, -1.6587680626866765, 0.6944370783251884, -3.1489689804677052, -0.08382709701831517, 4.348829798375527, -1.5588094270118842, 2.13849233857708, -1.5908464881888846, -0.6844744017914186, -3.7983152577959185, 3.351480549203788, -1.6993752804570985, -4.728609217219302, 5.6124

In [19]:
print(labels_last)

[0.6291060065731892, -0.5415437105492555, -1.1270856707669277, -2.3035853753842526, -1.6587680626866765, 0.6944370783251884, -3.1489689804677052, -0.08382709701831517, 4.348829798375527, -1.5588094270118842, 2.13849233857708, -1.5908464881888846, -0.6844744017914186, -3.7983152577959185, 3.351480549203788, -1.6993752804570985, -4.728609217219302, 5.612459385793563, -6.142941995837607, 2.0242552104476284, -1.2143804440260315, -0.418662056114602, -3.0598944978511544, -1.0612914060771588, 4.69065502143964, -0.31678379739136253, 2.4336669566549802, -2.015048780142669, 1.294101967272526, -0.1526996181627658, -2.5456611472919657, 0.7333766624624874, -0.736109925194074, 4.071411453374003, -0.6041550138901538, 1.1548589228689992, 0.2852747236103921, 0.8725223290782042, 2.3333504347404834, -0.4879835323937954, 0.3197997979633807, -0.6981235379114663, -1.0382238465900657, -1.5186337352383583, 1.3066618223838875, -0.23950372411393403, 1.3098076331461441, -0.022709263739497765, -0.8230592103234843

In [20]:
# Convert to tensors
X_train, X_test, y_train_first, y_test_first, y_train_last, y_test_last = train_test_split(sequences, labels_first, labels_last, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_first_tensor = torch.FloatTensor(y_train_first)
y_train_last_tensor = torch.FloatTensor(y_train_last)

X_test_tensor = torch.FloatTensor(X_test)
y_test_first_tensor = torch.FloatTensor(y_test_first)
y_test_last_tensor = torch.FloatTensor(y_test_last)

# Create DataLoaders
batch_size = 5

train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_first_tensor, y_train_last_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = torch.utils.data.TensorDataset(X_test_tensor, y_test_first_tensor, y_test_last_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [21]:
# Example usage of DataLoader for training set
for sequences, first_labels, last_labels in train_loader:
    print("Batch Sequences:")
    print(sequences)
    print("Batch First Labels:")
    print(first_labels)
    print("Batch Last Labels:")
    print(last_labels)
    break  # Print only the first batch for demonstration



Batch Sequences:
tensor([[[-0.3866, -0.3788, -0.3647,  ..., -0.3501, -0.3822,  0.5496],
         [-0.2976, -0.3335, -0.2938,  ..., -0.3299, -0.3667,  0.1959],
         [-0.3409, -0.3851, -0.3744,  ..., -0.4284, -0.1994, -1.2771],
         ...,
         [-0.3291, -0.2949, -0.2886,  ..., -0.2836, -0.0403,  0.5401],
         [-0.3018, -0.3261, -0.2708,  ..., -0.3226,  0.1846, -0.5336],
         [-0.3141, -0.2991, -0.3019,  ..., -0.2687, -0.3540,  0.6112]],

        [[ 0.1777,  0.1406,  0.1942,  ...,  0.1636, -0.6266,  0.2618],
         [ 0.1479,  0.1523,  0.1918,  ...,  0.1662, -0.8355, -0.0271],
         [ 0.1422,  0.1071,  0.1393,  ...,  0.1207, -0.4554, -0.5553],
         ...,
         [ 0.2902,  0.2872,  0.3204,  ...,  0.3003, -0.8165,  0.4075],
         [ 0.3260,  0.3010,  0.3452,  ...,  0.2848, -0.7083, -0.2217],
         [ 0.3353,  0.2941,  0.3277,  ...,  0.3007, -0.6151,  0.1134]],

        [[-1.8020, -1.7819, -1.7636,  ..., -1.7333,  0.7049,  1.2914],
         [-1.7501, -1.7681, 

In [22]:
class BiRNN(nn.Module):

    def save_hyperparameters(self, ignore=[]):
        """Save function arguments into class attributes."""
        frame = inspect.currentframe().f_back
        _, _, _, local_vars = inspect.getargvalues(frame)
        self.hparams = {k:v for k, v in local_vars.items()
                        if k not in set(ignore+['self']) and not k.startswith('_')}
        for k, v in self.hparams.items():
            setattr(self, k, v)

    def __init__(self, num_inputs, num_hiddens, num_layers=1, sigma=0.01):
        super().__init__()
        self.save_hyperparameters()
        self.num_inputs = int(num_inputs)
        self.num_hiddens = int(num_hiddens)
        self.f_rnn = nn.LSTM(num_inputs, num_hiddens, num_layers=num_layers)
        self.b_rnn = nn.LSTM(num_inputs, num_hiddens, num_layers=num_layers)
        self.num_hiddens *= 2  # The output dimension will be doubled

        # Additional LSTM layers
        self.f_rnn_2 = nn.LSTM(self.num_hiddens, self.num_hiddens, num_layers=num_layers, batch_first=True)
        self.b_rnn_2 = nn.LSTM(self.num_hiddens, self.num_hiddens, num_layers=num_layers, batch_first=True)

        # Final linear layer
        self.linear = nn.Linear(self.num_hiddens, 1)

    def forward(self, inputs, Hs=None):
      f_H, b_H = Hs if Hs is not None else (None, None)
      f_outputs, f_H = self.f_rnn(inputs, f_H)
      b_outputs, b_H = self.b_rnn(reversed(inputs), b_H)
      outputs = torch.cat((f_outputs, reversed(b_outputs)), dim=2)

      # Additional LSTM layers
      outputs, _ = self.f_rnn_2(outputs)
      outputs, _ = self.b_rnn_2(outputs)

      # Apply linear layer
      outputs = self.linear(outputs)

      return outputs[:, [0, -1], :], (f_H, b_H)

In [23]:
print(type(X_train_tensor.size(2)))

<class 'int'>


In [24]:
def train(model, train_loader, criterion, optimizer, epochs=10, device='cpu'):
    model.train()  # Set the model to training mode
    model.to(device)  # Move model to the specified device (e.g., GPU)

    for epoch in range(epochs):
        total_loss = 0.0
        total_L1 = 0.0
        total_L2 = 0.0

        for sequences, first_labels, last_labels in train_loader:
            sequences, first_labels, last_labels = sequences.to(device), first_labels.to(device), last_labels.to(device)

            optimizer.zero_grad()  # Zero the gradients

            # Forward pass
            outputs, _ = model(sequences)

            # Separate outputs into first and last elements
            batch_size = outputs.size(0)
            predicted_first = outputs[:, 0, 0]
            predicted_last = outputs[:, -1, -1]

            # Calculate L1 and L2 losses
            loss_1 = criterion(predicted_first, first_labels)
            loss_2 = criterion(predicted_last, last_labels)

            # Total loss as the sum of L1 and L2 losses
            loss = loss_1 + loss_2
            total_L1 += loss_1.item()
            total_L2 += loss_2.item()
            total_loss += loss.item()

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

        # Print average loss for the epoch
        if (epoch+1) % 5 == 0:
          print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss / len(train_loader):.4f}, L1: {total_L1 / len(train_loader):.4f}, L2: {total_L2 / len(train_loader):.4f}")

# Example usage:
model = BiRNN(num_inputs=7, num_hiddens=64)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.002)

train(model, train_loader, criterion, optimizer, epochs=100, device='cpu')


Epoch [5/100], Loss: 0.0390, L1: 0.0109, L2: 0.0281
Epoch [10/100], Loss: 0.0043, L1: 0.0012, L2: 0.0031
Epoch [15/100], Loss: 0.0088, L1: 0.0020, L2: 0.0068
Epoch [20/100], Loss: 0.0153, L1: 0.0043, L2: 0.0110
Epoch [25/100], Loss: 0.0044, L1: 0.0016, L2: 0.0028
Epoch [30/100], Loss: 0.0327, L1: 0.0119, L2: 0.0208
Epoch [35/100], Loss: 0.0125, L1: 0.0047, L2: 0.0078
Epoch [40/100], Loss: 0.0131, L1: 0.0038, L2: 0.0093
Epoch [45/100], Loss: 0.0080, L1: 0.0032, L2: 0.0049
Epoch [50/100], Loss: 0.0051, L1: 0.0016, L2: 0.0034
Epoch [55/100], Loss: 0.0064, L1: 0.0032, L2: 0.0033
Epoch [60/100], Loss: 0.0074, L1: 0.0019, L2: 0.0055
Epoch [65/100], Loss: 0.0023, L1: 0.0008, L2: 0.0016
Epoch [70/100], Loss: 0.0018, L1: 0.0008, L2: 0.0010
Epoch [75/100], Loss: 0.0141, L1: 0.0044, L2: 0.0097
Epoch [80/100], Loss: 0.0026, L1: 0.0010, L2: 0.0017
Epoch [85/100], Loss: 0.0140, L1: 0.0059, L2: 0.0081
Epoch [90/100], Loss: 0.0061, L1: 0.0023, L2: 0.0038
Epoch [95/100], Loss: 0.0090, L1: 0.0030, L2: 0

In [26]:
def evaluate(model, test_loader, criterion, device='cpu'):
    model.eval()  # Set the model to evaluation mode
    model.to(device)  # Move model to the specified device (e.g., GPU)
    total_loss = 0.0
    total_L1 = 0.0
    total_L2 = 0.0

    with torch.no_grad():
        for sequences, first_labels, last_labels in test_loader:
            sequences, first_labels, last_labels = sequences.to(device), first_labels.to(device), last_labels.to(device)

            # Forward pass
            outputs, _ = model(sequences)

            # Separate outputs into first and last elements
            predicted_first = outputs[:, 0, 0]
            predicted_last = outputs[:, -1, -1]

            # Calculate loss
            loss_1 = criterion(predicted_first, first_labels)
            loss_2 = criterion(predicted_last, last_labels)
            loss = loss_1 + loss_2
            total_L1 += loss_1.item()
            total_L2 += loss_2.item()
            total_loss += loss.item()

    # Calculate average loss
    avg_loss = total_loss / len(test_loader)
    avg_L1 = total_L1 / len(test_loader)
    avg_L2 = total_L2 / len(test_loader)
    return avg_loss, avg_L1, avg_L2

# Evaluate the model on the test set
test_loss, test_L1, test_L2 = evaluate(model, test_loader, criterion, device='cpu')
print(f"Test Loss: {test_loss:.4f}, L1: {test_L1:.4f}, L2: {test_L2:.4f}")


Test Loss: 0.0072, L1: 0.0058, L2: 0.0014
