In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
dataset_path = '../datasets/consolidated/consolidated.csv'

data = pd.read_csv(
    filepath_or_buffer=dataset_path,
)

data['date'] = pd.to_datetime(data['date']).dt.date

data

Unnamed: 0,date,positive,neutral,negative,open,high,low,volume,close
0,2013-01-21,0.224461,0.501282,0.274257,15.7,17.0,15.6,61502,16.9
1,2013-01-22,0.288634,0.496198,0.215168,16.8,17.6,16.6,60975,17.4
2,2013-01-23,0.257223,0.437274,0.305503,17.3,17.6,16.8,49439,17.9
3,2013-01-24,0.235050,0.551573,0.213377,17.5,19.2,15.6,172009,17.8
4,2013-01-25,0.231190,0.506552,0.262257,16.9,17.8,15.4,80767,18.7
...,...,...,...,...,...,...,...,...,...
2531,2019-12-27,0.226849,0.446411,0.326740,7210.8,7293.8,7128.5,718074,7261.8
2532,2019-12-28,0.169157,0.471205,0.359638,7261.9,7375.9,7256.5,610964,7196.4
2533,2019-12-29,0.197365,0.523340,0.279295,7321.6,7518.9,7303.0,611687,7199.8
2534,2019-12-30,0.170356,0.481577,0.348067,7397.5,7420.9,7244.1,606110,6967.0


In [None]:
target = 'close'

x = data.drop(columns=[target, 'date']).values
y = data[target].values

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

x = scaler.fit_transform(x)
y = scaler.fit_transform(y.reshape(-1, 1))

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=False)

In [434]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.model_selection import KFold

# Definir el modelo de Transformer
class TransformerModel(nn.Module):
    def __init__(self, input_dim, output_dim, num_heads, num_layers, hidden_dim):
        super(TransformerModel, self).__init__()
        self.transformer = nn.Transformer(
            d_model=input_dim,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dim_feedforward=hidden_dim,
            dropout=0.1,
            activation='relu',
        )
        self.fc = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        x = self.transformer(x, x)
        x = self.fc(x)
        return x

# Definir conjunto de datos personalizado
class CustomDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

# Crear el modelo de Transformer
input_dim = x.shape[1]
output_dim = y.shape[1]
num_heads = input_dim
num_layers = 3
hidden_dim = 64

# Definir número de divisiones para k-fold cross-validation
num_splits = 5
kf = KFold(n_splits=num_splits, shuffle=False)

# Listas para almacenar los resultados de cada fold
rmse_list = []
mape_list = []

# Loop para realizar k-fold cross-validation
for fold, (train_indices, test_indices) in enumerate(kf.split(x)):
    print(f"Fold {fold+1}: ", end="")

    # Dividir los datos en conjuntos de entrenamiento y prueba para este fold
    x_train, x_test = x[train_indices], x[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]

    # Crear DataLoader para los datos de entrenamiento
    x_train_tensor = torch.from_numpy(x_train).float()
    y_train_tensor = torch.from_numpy(y_train).float()
    train_dataset = CustomDataset(x_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

    # Crear modelo para este fold
    model = TransformerModel(
        input_dim=input_dim,
        output_dim=output_dim,
        num_heads=num_heads,
        num_layers=num_layers,
        hidden_dim=hidden_dim,
    )

    # Definir función de pérdida RMSE
    criterion = nn.MSELoss()

    # Definir el optimizador
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # Entrenamiento del modelo
    num_epochs = 200
    loss_list = []

    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0.0

        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()
            output = model(batch_x)
            loss = criterion(output, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += np.sqrt(loss.item())

    # Evaluar el modelo con los datos de prueba
    model.eval()
    with torch.no_grad():
        x_test_tensor = torch.from_numpy(x_test).float()
        y_test_tensor = torch.from_numpy(y_test).float()
        output = model(x_test_tensor)
        y_pred = output.detach().numpy()

    # Calcular el RMSE
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    rmse_list.append(rmse)

    # Calcular el MAPE
    y_test_original = scaler.inverse_transform(y_test)
    y_pred_original = scaler.inverse_transform(y_pred)
    mape = mean_absolute_percentage_error(y_test_original, y_pred_original)
    mape_list.append(mape)

    print(f"RMSE: {rmse:.6f}, MAPE: {mape:.6f}")

# Calcular los promedios de RMSE y MAPE para todos los folds
avg_rmse = np.mean(rmse_list)
avg_mape = np.mean(mape_list)

print(f"\nAverage RMSE: {avg_rmse:.6f}")
print(f"Average MAPE: {avg_mape:.6f}")

Fold 1: 

RMSE: 0.005927, MAPE: 0.107872
Fold 2: RMSE: 0.004217, MAPE: 0.071275
Fold 3: RMSE: 0.017937, MAPE: 0.057524
Fold 4: RMSE: 0.042225, MAPE: 0.020288
Fold 5: RMSE: 0.052780, MAPE: 0.029633

Average RMSE: 0.024617
Average MAPE: 0.057319
