In [9]:
import pandas as pd
import matplotlib.pyplot as plt

In [10]:
dataset_path = '../datasets/consolidated/consolidated.csv'

data = pd.read_csv(
    filepath_or_buffer=dataset_path,
)

data['date'] = pd.to_datetime(data['date']).dt.date

data

Unnamed: 0,date,positive,neutral,negative,open,high,low,volume,close
0,2013-01-21,0.224461,0.501282,0.274257,15.7,17.0,15.6,61502,16.9
1,2013-01-22,0.288634,0.496198,0.215168,16.8,17.6,16.6,60975,17.4
2,2013-01-23,0.257223,0.437274,0.305503,17.3,17.6,16.8,49439,17.9
3,2013-01-24,0.235050,0.551573,0.213377,17.5,19.2,15.6,172009,17.8
4,2013-01-25,0.231190,0.506552,0.262257,16.9,17.8,15.4,80767,18.7
...,...,...,...,...,...,...,...,...,...
2531,2019-12-27,0.226849,0.446411,0.326740,7210.8,7293.8,7128.5,718074,7261.8
2532,2019-12-28,0.169157,0.471205,0.359638,7261.9,7375.9,7256.5,610964,7196.4
2533,2019-12-29,0.197365,0.523340,0.279295,7321.6,7518.9,7303.0,611687,7199.8
2534,2019-12-30,0.170356,0.481577,0.348067,7397.5,7420.9,7244.1,606110,6967.0


In [11]:
target = 'close'

x = data.drop(columns=[target, 'date']).values
y = data[target].values

In [12]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

x = scaler.fit_transform(x)
y = scaler.fit_transform(y.reshape(-1, 1))

In [13]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=False)

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import KFold
import numpy as np

# Definir el número de folds para la validación cruzada
num_folds = 5

# Convertir los datos a tensores de PyTorch
x = torch.Tensor(x)
y = torch.Tensor(y)

# Crear conjuntos de datos para el entrenamiento y la prueba
dataset = TensorDataset(x, y)

# Inicializar arrays para almacenar los resultados de cada fold
rmse_scores = []
mape_scores = []

# Realizar la validación cruzada
kfold = KFold(n_splits=num_folds, shuffle=False)

for fold, (train_indices, test_indices) in enumerate(kfold.split(x)):
    print(f"Fold {fold+1}: ", end="")

    # Dividir los datos en conjuntos de entrenamiento y prueba para el fold actual
    x_train, x_test = x[train_indices], x[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]

    # Crear dataloaders para el entrenamiento y la prueba del fold actual
    train_dataset = TensorDataset(x_train, y_train)
    test_dataset = TensorDataset(x_test, y_test)

    # Entrenamiento del modelo
    num_epochs = 200
    batch_size = 64

    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

    # Definir los parámetros del modelo
    input_size = x_train.shape[1]
    hidden_size = 32
    output_size = 1
    num_layers = 1

    # Crear el modelo GRU
    model = nn.GRU(
        input_size=input_size,
        hidden_size=hidden_size,
        num_layers=num_layers,
        bidirectional=False,
        batch_first=True
    )

    # Definir la función de pérdida y el optimizador
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters())

    # Entrenamiento del modelo
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0

        for i, (inputs, labels) in enumerate(train_dataloader):
            optimizer.zero_grad()

            outputs, _ = model(inputs.unsqueeze(1))
            loss = criterion(outputs[:, -1, :], labels.unsqueeze(1))
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

    # Evaluar el modelo en los datos de prueba del fold actual
    model.eval()
    with torch.no_grad():
        test_outputs, _ = model(x_test.unsqueeze(1))
        test_loss = criterion(test_outputs[:, -1, :], y_test.unsqueeze(1))

    # Calcular el error RMSE y MAPE para el fold actual
    rmse = np.sqrt(test_loss.item())
    mape = np.mean(np.abs(scaler.inverse_transform(test_outputs[:, -1, :]) - scaler.inverse_transform(y_test) ) / scaler.inverse_transform(y_test))

    print(f"RMSE: {rmse:.6f}, MAPE: {mape:.6f}", end="\n\n")

    rmse_scores.append(rmse)
    mape_scores.append(mape)

# Calcular los promedios de los errores RMSE y MAPE para todos los folds
avg_rmse = np.mean(rmse_scores)
avg_mape = np.mean(mape_scores)

print(f"Average RMSE: {avg_rmse:.6f}")
print(f"Average MAPE: {avg_mape:.6f}")

Fold 1: RMSE: 0.024235, MAPE: 0.163996

Fold 2: RMSE: 0.010124, MAPE: 0.044858

Fold 3: RMSE: 0.018131, MAPE: 0.034060

Fold 4: RMSE: 0.269992, MAPE: 0.025575

Fold 5: RMSE: 0.172274, MAPE: 0.062061

Average RMSE: 0.098951
Average MAPE: 0.066110
