In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
dataset_path = '../datasets/consolidated/consolidated.csv'

data = pd.read_csv(
    filepath_or_buffer=dataset_path,
)

data['date'] = pd.to_datetime(data['date']).dt.date

data

Unnamed: 0,date,positive,neutral,negative,open,high,low,volume,close
0,2013-01-21,0.224461,0.501282,0.274257,15.7,17.0,15.6,61502,16.9
1,2013-01-22,0.288634,0.496198,0.215168,16.8,17.6,16.6,60975,17.4
2,2013-01-23,0.257223,0.437274,0.305503,17.3,17.6,16.8,49439,17.9
3,2013-01-24,0.235050,0.551573,0.213377,17.5,19.2,15.6,172009,17.8
4,2013-01-25,0.231190,0.506552,0.262257,16.9,17.8,15.4,80767,18.7
...,...,...,...,...,...,...,...,...,...
2531,2019-12-27,0.226849,0.446411,0.326740,7210.8,7293.8,7128.5,718074,7261.8
2532,2019-12-28,0.169157,0.471205,0.359638,7261.9,7375.9,7256.5,610964,7196.4
2533,2019-12-29,0.197365,0.523340,0.279295,7321.6,7518.9,7303.0,611687,7199.8
2534,2019-12-30,0.170356,0.481577,0.348067,7397.5,7420.9,7244.1,606110,6967.0


In [3]:
target = 'close'

x = data.drop(columns=[target, 'date']).values
y = data[target].values

In [4]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

x = scaler.fit_transform(x)
y = scaler.fit_transform(y.reshape(-1, 1))

In [5]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=False)

In [6]:
# import torch
# import torch.nn as nn
# import numpy as np

# # Convertir los datos a tensores de PyTorch
# x_train = torch.tensor(x_train, dtype=torch.float32)
# y_train = torch.tensor(y_train, dtype=torch.float32)
# x_test = torch.tensor(x_test, dtype=torch.float32)
# y_test = torch.tensor(y_test, dtype=torch.float32)

# # Definir la arquitectura del modelo LSTM
# input_size = x_train.shape[1]
# hidden_size = 64
# num_layers = 8

# model = nn.LSTM(
#     input_size=input_size,
#     hidden_size=hidden_size,
#     num_layers=num_layers,
#     bidirectional=True,
#     batch_first=False,
#     bias=False
# )

# # Definir la función de pérdida y el optimizador
# criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# # Entrenamiento del modelo
# num_epochs = 200

# for epoch in range(num_epochs):
#     model.train()
#     outputs, _ = model(x_train.unsqueeze(1))
#     loss = torch.sqrt(criterion(outputs.squeeze(), y_train))

#     optimizer.zero_grad()
#     loss.backward()
#     optimizer.step()

# # Evaluación del modelo con datos de prueba
# model.eval()
# with torch.no_grad():
#     test_outputs, _ = model(x_test.unsqueeze(1))
#     test_loss = torch.sqrt(criterion(test_outputs.squeeze(), y_test))

# # Calcular RMSE
# rmse = test_loss.item()

# # Calcular MAPE
# y_test = scaler.inverse_transform(y_test.numpy())
# test_outputs = scaler.inverse_transform(test_outputs.squeeze().numpy())
# mape = np.mean(np.abs((y_test - test_outputs) / y_test))

# print(f'RMSE: {rmse:.6f}')
# print(f'MAPE: {mape:.6f}')

In [7]:
import warnings

warnings.filterwarnings("ignore")

In [8]:
import torch
import torch.nn as nn
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold

# Definir el número de folds
num_folds = 5

# Convertir los datos a tensores de PyTorch
x = torch.tensor(x, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

# Crear el objeto KFold
kf = KFold(n_splits=num_folds, shuffle=False)

# Definir la arquitectura del modelo LSTM
input_size = x.shape[1]
hidden_size = 32
num_layers = 8

# Variables para almacenar los resultados de cada fold
rmse_scores = []
mape_scores = []

# Iterar sobre cada fold
for fold, (train_index, test_index) in enumerate(kf.split(x)):
    print(f"Fold {fold+1}: ", end="")

    # Obtener los conjuntos de entrenamiento y prueba para este fold
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Normalizar los datos
    scaler = MinMaxScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)
    y_train = scaler.fit_transform(y_train.reshape(-1, 1))

    # Convertir los datos a tensores de PyTorch
    x_train = torch.tensor(x_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32)
    x_test = torch.tensor(x_test, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32)

    # Crear una nueva instancia del modelo para cada fold
    model = nn.LSTM(
        input_size=input_size,
        hidden_size=hidden_size,
        num_layers=num_layers,
        bidirectional=True,
        batch_first=False,
        bias=False
    )

    # Definir la función de pérdida y el optimizador
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # Entrenamiento del modelo
    num_epochs = 200

    for epoch in range(num_epochs):
        model.train()
        outputs, _ = model(x_train.unsqueeze(1))
        loss = torch.sqrt(criterion(outputs.squeeze(), y_train))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Evaluación del modelo con datos de prueba
    model.eval()
    with torch.no_grad():
        test_outputs, _ = model(x_test.unsqueeze(1))
        test_loss = torch.sqrt(criterion(test_outputs.squeeze(), y_test))

    # Calcular RMSE
    rmse = test_loss.item()
    rmse_scores.append(rmse)

    # Calcular MAPE
    y_test = scaler.inverse_transform(y_test.numpy())
    test_outputs = scaler.inverse_transform(test_outputs.squeeze().numpy())
    mape = np.mean(np.abs((y_test - test_outputs) / y_test))
    mape_scores.append(mape)

    print(f'RMSE: {rmse:.6f}, MAPE: {mape:.6f}', end="\n\n")

# Calcular promedio de RMSE y MAPE de todos los folds
avg_rmse = np.mean(rmse_scores)
avg_mape = np.mean(mape_scores)

print(f'Average RMSE: {avg_rmse:.6f}')
print(f'Average MAPE: {avg_mape:.6f}')

Fold 1: RMSE: 0.008857, MAPE: 0.099008

Fold 2: RMSE: 0.003395, MAPE: 0.041344

Fold 3: RMSE: 0.003394, MAPE: 0.024398

Fold 4: RMSE: 0.156953, MAPE: 0.138263

Fold 5: RMSE: 0.072873, MAPE: 0.045839

Average RMSE: 0.049095
Average MAPE: 0.069770
