In [1]:
from re import S
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

import math
import matplotlib.pyplot as plt

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


# Carga de datos

In [3]:
df = pd.read_csv('close_normalized_prices_index.csv')
# df['X'] = df['X'].apply(lambda x: np.fromstring(x.strip("[]"), sep=' '))
df['Y'] = df['Y'].apply(lambda x: np.fromstring(x.strip("[]"), sep=' '))

In [4]:
df['X'] = df['X'].apply(lambda x:np.fromstring(x.replace('[', '').replace(']', '').replace('\n', ' '), sep=' ').reshape(14, 28))

# Modelos

In [5]:
class CNN_LSTM(nn.Module):

  def __init__(self):
    super(CNN_LSTM, self).__init__()
    # Capa Conv1D para extracción de características espaciales
    self.conv1 = nn.Conv1d(in_channels=14, out_channels=64, kernel_size=5, padding=1)
    self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=5, padding=1)
    self.conv3 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=5, padding=1)

    # Batch normalization
    self.bn1 = nn.BatchNorm1d(64)
    self.bn2 = nn.BatchNorm1d(128)
    self.bn3 = nn.BatchNorm1d(256)

    # Max Pool
    self.pool = nn.MaxPool1d(kernel_size=2)

    # Dropout
    self.dropout_layer = nn.Dropout(p=0.15)

    # ReLu
    self.relu = nn.ReLU()

    # Capa LSTM
    self.lstm = nn.LSTM(input_size=256, hidden_size=64, num_layers=3, batch_first=True)

    # Capa Fully Connected
    self.fc1 = nn.Linear(64, 32)  # Predecir 5 valores
    self.fc2 = nn.Linear(32, 16)  # Predecir un solo valor
    self.fc3 = nn.Linear(16, 5)  # Predecir un solo valor

    # Inicialización de pesos
    self._initialize_weights()

  def _initialize_weights(self):
    for m in self.modules():
      if isinstance(m, nn.Conv1d) or isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
          nn.init.zeros_(m.bias)

  def forward(self, x):
    # Primer capa completa
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.pool(x)

    # Segunda capa completa
    x = self.conv2(x)
    x = self.bn2(x)
    x = self.relu(x)
    x = self.pool(x)

    # Tercera capa completa
    x = self.conv3(x)
    x = self.bn3(x)
    x = self.relu(x)
    x = self.pool(x)

    # Reordenar datos
    # Antes batch, channels, seq_length
    x = x.permute(0, 2, 1)
    # Después batch, seq_length, features

    # Capas LSTM
    x, _ = self.lstm(x)

    # Se toma la ultima salida de secuencia, que es la representación de la serie
    x = x[:, -1, :]

    # Capas Fully Connected
    x = self.dropout_layer(x)
    x = self.fc1(x)
    x = self.relu(x)
    x = self.dropout_layer(x)
    x = self.fc2(x)
    x = self.relu(x)
    x = self.dropout_layer(x)
    x = self.fc3(x)

    return x

# Carga

In [6]:
model = CNN_LSTM()

# Cargar el estado del modelo guardado
model.load_state_dict(torch.load("../weights/cnn_lstm_index_v1_weights_3.pth"))

# Mover el modelo al dispositivo adecuado (CPU o GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

CNN_LSTM(
  (conv1): Conv1d(14, 64, kernel_size=(5,), stride=(1,), padding=(1,))
  (conv2): Conv1d(64, 128, kernel_size=(5,), stride=(1,), padding=(1,))
  (conv3): Conv1d(128, 256, kernel_size=(5,), stride=(1,), padding=(1,))
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout_layer): Dropout(p=0.15, inplace=False)
  (relu): ReLU()
  (lstm): LSTM(256, 64, num_layers=3, batch_first=True)
  (fc1): Linear(in_features=64, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=5, bias=True)
)

# Prueba

In [15]:
errors = dict()
for tick in df.TICK.unique():
    pred = list()
    actual = list()
    df_t = df[df['TICK'] == tick]
    X = torch.tensor(np.stack(df_t.X.to_numpy()), dtype=torch.float32).reshape(-1, 14, 28)
    Y = torch.tensor(np.stack(df_t.Y.to_numpy()), dtype=torch.float32)
    loader = DataLoader(TensorDataset(X, Y), batch_size=100, shuffle=False)
    with torch.no_grad():
        model.eval()
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            y_pred = model(x)
            y_pred = y_pred.cpu().numpy()
            pred.append(y_pred)
            actual.append(y.cpu().numpy())
    pred = np.concatenate(pred).reshape(-1, 5)
    actual = np.concatenate(actual).reshape(-1, 5)
    error = np.mean(np.abs(pred - actual))
    print(f'Tick: {tick} - Error: {error}')
    errors[tick] = error
errors = pd.Series(errors)
errors
            


Tick: AAPL - Error: 0.16950801014900208
Tick: MSFT - Error: 0.16101089119911194
Tick: NVDA - Error: 0.16251996159553528
Tick: AMZN - Error: 0.16838279366493225
Tick: META - Error: 0.1781526505947113
Tick: GOOGL - Error: 0.15883389115333557
Tick: BRKB - Error: 0.17854610085487366
Tick: AVGO - Error: 0.15928716957569122
Tick: GOOG - Error: 0.1738014817237854
Tick: TSLA - Error: 0.15180645883083344
Tick: JPM - Error: 0.1803836226463318
Tick: LLY - Error: 0.1688796579837799
Tick: V - Error: 0.1692415475845337
Tick: XOM - Error: 0.16247108578681946
Tick: MA - Error: 0.16239149868488312
Tick: COST - Error: 0.17593365907669067
Tick: UNH - Error: 0.1787312924861908
Tick: WMT - Error: 0.17698633670806885
Tick: NFLX - Error: 0.17533308267593384
Tick: PG - Error: 0.1734035611152649
Tick: JNJ - Error: 0.16724242269992828
Tick: HD - Error: 0.18357127904891968
Tick: ABBV - Error: 0.14074642956256866
Tick: BAC - Error: 0.1694839745759964
Tick: KO - Error: 0.16107536852359772
Tick: CRM - Error: 0.1689

AAPL    0.169508
MSFT    0.161011
NVDA    0.162520
AMZN    0.168383
META    0.178153
          ...   
WTM     0.197012
AGO     0.172339
FMC     0.169344
SON     0.190531
DLB     0.251843
Length: 850, dtype: float32

In [18]:
errors.sort_values(ascending=False).head(10)


DLB      0.251843
PATH     0.207119
NXST     0.204657
SNDK     0.203673
BFAM     0.202984
PRI      0.202220
DOV      0.200403
UHALB    0.197512
WTM      0.197012
ACM      0.196446
dtype: float32