# Importe de librerias

In [None]:
import pandas as pd
import json
import lightning as L
from pytorch_lightning.loggers import CSVLogger
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import optuna

torch.set_float32_matmul_precision('high')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dataset import LitPriceData
from trainer import LitTrainer

In [3]:
def priceDataframe(name):
    jsonPath = f"../data/info_{name}.json"
    with open(jsonPath, "r") as f:
        data = json.load(f) 
        
    df = pd.DataFrame({
        "price_timestamp": [item[0] for item in data["prices"]],
        "price": [item[1] for item in data["prices"]],
        "market_caps_timestamp": [item[0] for item in data["market_caps"]],
        "market_caps": [item[1] for item in data["market_caps"]],
        "volume_timestamp": [item[0] for item in data["total_volumes"]],
        "volume": [item[1] for item in data["total_volumes"]]
    })
    
    df["price_timestamp"] = pd.to_datetime(df["price_timestamp"], unit="ms")
    #df["market_caps_timestamp"] = pd.to_datetime(df["market_caps_timestamp"], unit="ms")
    #df["volume_timestamp"] = pd.to_datetime(df["volume_timestamp"], unit="ms")

    df["hour"] = df["price_timestamp"].dt.hour
    #df["market_caps_timestamp_hour"] = df["market_caps_timestamp"].dt.hour
    #df["volume_timestamp_hour"] = df["volume_timestamp"].dt.hour

    df["dayweek"] = df["price_timestamp"].dt.dayofweek
    #df["market_caps_timestamp_dayweek"] = df["market_caps_timestamp"].dt.dayofweek
    #df["volume_timestamp_dayweek"] = df["volume_timestamp"].dt.dayofweek

    df["hour_cos"] = np.cos(2 * np.pi * df["hour"] / 24)
    df["hour_sin"] = np.sin(2 * np.pi * df["hour"] / 24)

    df["dayweek_cos"] = np.cos(2 * np.pi * df["hour"] / 7)
    df["dayweek_sin"] = np.sin(2 * np.pi * df["hour"] / 7)

    df.drop(columns=["price_timestamp", "market_caps_timestamp", "volume_timestamp"], inplace=True)
    df.drop(columns=["hour", "dayweek"], inplace=True)
    
    return df

In [4]:
def splitData(name, sequence_length=72, prediction_offset=120):
    df = priceDataframe(name)

    feature_cols = ["price", "market_caps", "volume", 
                    "hour_cos", "hour_sin", 
                    "dayweek_cos", "dayweek_sin"]

    data = df[feature_cols].values

    X, y = [], []
    for i in range(len(data) - sequence_length - prediction_offset):
        X.append(data[i : i + sequence_length])
        y.append(data[i + sequence_length + prediction_offset - 1])

    X = np.array(X)
    y = np.array(y)
    
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.float32)   
    
    return X_tensor, y_tensor

In [5]:
name = "bitcoin"

def objective(trial):   
    hidden_size = trial.suggest_int("hidden_size", 32, 128)
    num_layers = trial.suggest_int("num_layers", 2, 5)
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    dropout= trial.suggest_float("dropout", 0.0, 0.5)
    
    sequence_length = trial.suggest_int("sequence_legth", 12, 72, step=12)

    logger = CSVLogger("lightning_logs", name="optuna")
    
    model = LitTrainer(hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)
    X, y = splitData(name, sequence_length=sequence_length)
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    dataModule = LitPriceData(X_train, y_train, X_val, y_val)
       
    trainer = L.Trainer(max_epochs=20, accelerator="auto", logger=logger, enable_progress_bar=False, enable_model_summary=False, enable_checkpointing=False)
    trainer.fit(model=model, datamodule=dataModule)
    
    return trainer.callback_metrics["val_loss"].item()

In [6]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=30)

print("Best trial:")
print(study.best_trials.params)

[I 2025-07-07 15:52:38,129] A new study created in memory with name: no-name-0ed32a09-db04-4add-bbb6-6ba457c782f9
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4060') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/exodia/miniconda3/envs/ML-TF-G/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
  loss = F.mse_loss(out, y)
/home/exod

UnsupportedOperation: File or stream is not seekable.