# 4. Modelado

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pandas as pd
import numpy as np
import joblib
import os
from tensorflow.keras.models import Sequential, save_model, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from collections import Counter


In [2]:
df = pd.read_csv("data_final.csv", sep=";")

Como se ha explicado anteriormente, entrenaremos los modelos diferenciando las criptomonedas a evaluar

## 4.2. Ethereum (Paso por paso)

### 4.2.1. Selección del conjunto base para modelado

In [3]:
df_eth = df[df["token"] == "Ethereum"].copy()
df_eth["close"] = pd.to_numeric(df_eth["close"], errors='coerce')
df_eth["volume"] = pd.to_numeric(df_eth["volume"], errors='coerce')
df_eth["marketCap"] = pd.to_numeric(df_eth["marketCap"], errors='coerce')
df_eth["timestamp"] = pd.to_datetime(df_eth["timestamp"], errors='coerce')
df_eth = df_eth.sort_values("timestamp")

### 4.2.2. Selección de variables predictoras y target

In [4]:
# Selección de variables para Ethereum
X_eth = df_eth[["close", "volume", "marketCap"]].shift(1)
y_eth = df_eth["close"]

# Eliminando filas con NaN
X_eth = X_eth.iloc[1:]
y_eth = y_eth.iloc[1:]

### 4.2.3. División del conjunto en entrenamiento y prueba

In [5]:
X_train_eth, X_test_eth, y_train_eth, y_test_eth = train_test_split(X_eth, y_eth, shuffle=False, test_size=0.2)

### 4.2.4. Escalamiento de variables numéricas

In [6]:
scaler_eth = StandardScaler()
X_train_eth_scaled = scaler_eth.fit_transform(X_train_eth)
X_test_eth_scaled = scaler_eth.transform(X_test_eth)

### 4.2.5. Aplicando Regresión Lineal

In [7]:
# Regresión Lineal Base para Ethereum
lr_model_eth = LinearRegression()
lr_model_eth.fit(X_train_eth_scaled, y_train_eth)
y_pred_lr_eth = lr_model_eth.predict(X_test_eth_scaled)

# Métricas
mae_lr_eth = mean_absolute_error(y_test_eth, y_pred_lr_eth)
mse_lr_eth = mean_squared_error(y_test_eth, y_pred_lr_eth)
rmse_lr_eth = np.sqrt(mse_lr_eth)
r2_lr_eth = r2_score(y_test_eth, y_pred_lr_eth)

metricas_lr_eth = {
    "MAE": mae_lr_eth,
    "MSE": mse_lr_eth,
    "RMSE": rmse_lr_eth,
    "R^2": r2_lr_eth
}
print("Regresión Lineal Base - Ethereum:")
print(metricas_lr_eth)

Regresión Lineal Base - Ethereum:
{'MAE': 74.31143445494561, 'MSE': 10908.545793630792, 'RMSE': np.float64(104.44398399922703), 'R^2': 0.9575978579606493}


In [8]:
# Regresión Lineal con Validación Cruzada - Ethereum
from sklearn.model_selection import TimeSeriesSplit
lr_cv_eth = LinearRegression()
tscv = TimeSeriesSplit(n_splits=5)
cv_scores_eth = []

for train_idx, val_idx in tscv.split(X_train_eth_scaled):
    X_train_cv, X_val_cv = X_train_eth_scaled[train_idx], X_train_eth_scaled[val_idx]
    y_train_cv, y_val_cv = y_train_eth.iloc[train_idx], y_train_eth.iloc[val_idx]

    lr_cv_eth.fit(X_train_cv, y_train_cv)
    y_pred_cv = lr_cv_eth.predict(X_val_cv)
    cv_scores_eth.append(mean_absolute_error(y_val_cv, y_pred_cv))

lr_cv_eth.fit(X_train_eth_scaled, y_train_eth)
y_pred_lr_cv_eth = lr_cv_eth.predict(X_test_eth_scaled)

mae_lr_cv_eth = mean_absolute_error(y_test_eth, y_pred_lr_cv_eth)
rmse_lr_cv_eth = np.sqrt(mean_squared_error(y_test_eth, y_pred_lr_cv_eth))
r2_lr_cv_eth = r2_score(y_test_eth, y_pred_lr_cv_eth)

metricas_lr_cv_eth = {
    "MAE": mae_lr_cv_eth,
    "RMSE": rmse_lr_cv_eth,
    "R^2": r2_lr_cv_eth
}
print("Regresión Lineal CV - Ethereum:")
print(metricas_lr_cv_eth)

Regresión Lineal CV - Ethereum:
{'MAE': 74.31143445494561, 'RMSE': np.float64(104.44398399922703), 'R^2': 0.9575978579606493}


In [9]:
# Ridge y Lasso para Ethereum
from sklearn.linear_model import Ridge, Lasso

# Ridge
ridge_eth = Ridge(alpha=1.0)
ridge_eth.fit(X_train_eth_scaled, y_train_eth)
y_pred_ridge_eth = ridge_eth.predict(X_test_eth_scaled)

mae_ridge_eth = mean_absolute_error(y_test_eth, y_pred_ridge_eth)
rmse_ridge_eth = np.sqrt(mean_squared_error(y_test_eth, y_pred_ridge_eth))
r2_ridge_eth = r2_score(y_test_eth, y_pred_ridge_eth)

# Lasso
lasso_eth = Lasso(alpha=0.1)
lasso_eth.fit(X_train_eth_scaled, y_train_eth)
y_pred_lasso_eth = lasso_eth.predict(X_test_eth_scaled)

mae_lasso_eth = mean_absolute_error(y_test_eth, y_pred_lasso_eth)
rmse_lasso_eth = np.sqrt(mean_squared_error(y_test_eth, y_pred_lasso_eth))
r2_lasso_eth = r2_score(y_test_eth, y_pred_lasso_eth)

# Comparación de resultados de Regresión Lineal
print("Comparación Regresión Lineal - Ethereum:")
print(f"Base - MAE: {mae_lr_eth:.2f}, RMSE: {rmse_lr_eth:.2f}, R²: {r2_lr_eth:.4f}")
print(f"CV - MAE: {mae_lr_cv_eth:.2f}, RMSE: {rmse_lr_cv_eth:.2f}, R²: {r2_lr_cv_eth:.4f}")
print(f"Ridge - MAE: {mae_ridge_eth:.2f}, RMSE: {rmse_ridge_eth:.2f}, R²: {r2_ridge_eth:.4f}")
print(f"Lasso - MAE: {mae_lasso_eth:.2f}, RMSE: {rmse_lasso_eth:.2f}, R²: {r2_lasso_eth:.4f}")

Comparación Regresión Lineal - Ethereum:
Base - MAE: 74.31, RMSE: 104.44, R²: 0.9576
CV - MAE: 74.31, RMSE: 104.44, R²: 0.9576
Ridge - MAE: 73.97, RMSE: 103.87, R²: 0.9581
Lasso - MAE: 73.76, RMSE: 103.68, R²: 0.9582


### 4.2.6. Aplicando Random Forest

In [10]:
# Random Forest para Ethereum
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV

# Random Forest Base
rf_base_eth = RandomForestRegressor(n_estimators=100, random_state=42)
rf_base_eth.fit(X_train_eth, y_train_eth)
y_pred_rf_base_eth = rf_base_eth.predict(X_test_eth)

mae_rf_base_eth = mean_absolute_error(y_test_eth, y_pred_rf_base_eth)
rmse_rf_base_eth = np.sqrt(mean_squared_error(y_test_eth, y_pred_rf_base_eth))
r2_rf_base_eth = r2_score(y_test_eth, y_pred_rf_base_eth)

# Random Forest con CV
rf_cv_eth = RandomForestRegressor(n_estimators=100, random_state=42)
rf_cv_scores_eth = []

for train_idx, val_idx in tscv.split(X_train_eth):
    X_train_rf_cv, X_val_rf_cv = X_train_eth.iloc[train_idx], X_train_eth.iloc[val_idx]
    y_train_rf_cv, y_val_rf_cv = y_train_eth.iloc[train_idx], y_train_eth.iloc[val_idx]

    rf_cv_eth.fit(X_train_rf_cv, y_train_rf_cv)
    y_pred_rf_cv = rf_cv_eth.predict(X_val_rf_cv)
    rf_cv_scores_eth.append(mean_absolute_error(y_val_rf_cv, y_pred_rf_cv))

rf_cv_eth.fit(X_train_eth, y_train_eth)
y_pred_rf_cv_eth = rf_cv_eth.predict(X_test_eth)

mae_rf_cv_eth = mean_absolute_error(y_test_eth, y_pred_rf_cv_eth)
rmse_rf_cv_eth = np.sqrt(mean_squared_error(y_test_eth, y_pred_rf_cv_eth))
r2_rf_cv_eth = r2_score(y_test_eth, y_pred_rf_cv_eth)

# Random Forest Optimizado
param_dist_eth = {
    'n_estimators': [100, 150, 200],
    'max_depth': [5, 10, 15, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt']
}

rf_opt_eth = RandomForestRegressor(random_state=42)
random_search_eth = RandomizedSearchCV(
    estimator=rf_opt_eth,
    param_distributions=param_dist_eth,
    n_iter=25,
    cv=TimeSeriesSplit(n_splits=5),
    scoring='neg_mean_absolute_error',
    random_state=42,
    n_jobs=1
)

random_search_eth.fit(X_train_eth, y_train_eth)
rf_optimized_eth = random_search_eth.best_estimator_
y_pred_rf_opt_eth = rf_optimized_eth.predict(X_test_eth)

mae_rf_opt_eth = mean_absolute_error(y_test_eth, y_pred_rf_opt_eth)
rmse_rf_opt_eth = np.sqrt(mean_squared_error(y_test_eth, y_pred_rf_opt_eth))
r2_rf_opt_eth = r2_score(y_test_eth, y_pred_rf_opt_eth)

# Comparación Random Forest
print("Comparación Random Forest - Ethereum:")
print(f"Base - MAE: {mae_rf_base_eth:.2f}, RMSE: {rmse_rf_base_eth:.2f}, R²: {r2_rf_base_eth:.4f}")
print(f"CV - MAE: {mae_rf_cv_eth:.2f}, RMSE: {rmse_rf_cv_eth:.2f}, R²: {r2_rf_cv_eth:.4f}")
print(f"Optimizado - MAE: {mae_rf_opt_eth:.2f}, RMSE: {rmse_rf_opt_eth:.2f}, R²: {r2_rf_opt_eth:.4f}")

Comparación Random Forest - Ethereum:
Base - MAE: 106.42, RMSE: 142.92, R²: 0.9206
CV - MAE: 106.42, RMSE: 142.92, R²: 0.9206
Optimizado - MAE: 99.64, RMSE: 134.70, R²: 0.9295


### 4.2.7. Aplicando LSTM

In [11]:
# LSTM para Ethereum
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Preparación de datos para LSTM
X_full_eth = df_eth[["close", "volume", "marketCap"]].values
y_full_eth = df_eth["close"].values

scaler_lstm_eth = StandardScaler()
X_scaled_eth = scaler_lstm_eth.fit_transform(X_full_eth)

lookback = 30
X_seq_eth = []
y_seq_eth = []

for i in range(lookback, len(X_scaled_eth)):
    X_seq_eth.append(X_scaled_eth[i - lookback:i])
    y_seq_eth.append(y_full_eth[i])

X_seq_eth = np.array(X_seq_eth)
y_seq_eth = np.array(y_seq_eth)

split_eth = int(len(X_seq_eth) * 0.8)
X_train_lstm_eth, X_test_lstm_eth = X_seq_eth[:split_eth], X_seq_eth[split_eth:]
y_train_lstm_eth, y_test_lstm_eth = y_seq_eth[:split_eth], y_seq_eth[split_eth:]

# Modelo LSTM Base
model_lstm_base_eth = Sequential()
model_lstm_base_eth.add(LSTM(50, activation='relu', input_shape=(lookback, 3)))
model_lstm_base_eth.add(Dense(1))
model_lstm_base_eth.compile(optimizer='adam', loss='mean_absolute_error')

model_lstm_base_eth.fit(X_train_lstm_eth, y_train_lstm_eth, epochs=50, batch_size=32, verbose=0)
y_pred_lstm_base_eth = model_lstm_base_eth.predict(X_test_lstm_eth, verbose=0)

mae_lstm_base_eth = mean_absolute_error(y_test_lstm_eth, y_pred_lstm_base_eth)
rmse_lstm_base_eth = np.sqrt(mean_squared_error(y_test_lstm_eth, y_pred_lstm_base_eth))
r2_lstm_base_eth = r2_score(y_test_lstm_eth, y_pred_lstm_base_eth)

# Modelo LSTM Optimización 1 (más capas)
model_lstm_opt1_eth = Sequential()
model_lstm_opt1_eth.add(LSTM(50, return_sequences=True, input_shape=(lookback, 3)))
model_lstm_opt1_eth.add(LSTM(50))
model_lstm_opt1_eth.add(Dense(1))
model_lstm_opt1_eth.compile(optimizer='adam', loss='mean_absolute_error')

model_lstm_opt1_eth.fit(X_train_lstm_eth, y_train_lstm_eth, epochs=50, batch_size=32, verbose=0)
y_pred_lstm_opt1_eth = model_lstm_opt1_eth.predict(X_test_lstm_eth, verbose=0)

mae_lstm_opt1_eth = mean_absolute_error(y_test_lstm_eth, y_pred_lstm_opt1_eth)
rmse_lstm_opt1_eth = np.sqrt(mean_squared_error(y_test_lstm_eth, y_pred_lstm_opt1_eth))
r2_lstm_opt1_eth = r2_score(y_test_lstm_eth, y_pred_lstm_opt1_eth)

# Modelo LSTM Optimización 2 (con Dropout)
model_lstm_opt2_eth = Sequential()
model_lstm_opt2_eth.add(LSTM(100, return_sequences=True, input_shape=(lookback, 3)))
model_lstm_opt2_eth.add(Dropout(0.2))
model_lstm_opt2_eth.add(LSTM(50))
model_lstm_opt2_eth.add(Dropout(0.2))
model_lstm_opt2_eth.add(Dense(1))
model_lstm_opt2_eth.compile(optimizer='adam', loss='mean_absolute_error')

model_lstm_opt2_eth.fit(X_train_lstm_eth, y_train_lstm_eth, epochs=50, batch_size=32, verbose=0)
y_pred_lstm_opt2_eth = model_lstm_opt2_eth.predict(X_test_lstm_eth, verbose=0)

mae_lstm_opt2_eth = mean_absolute_error(y_test_lstm_eth, y_pred_lstm_opt2_eth)
rmse_lstm_opt2_eth = np.sqrt(mean_squared_error(y_test_lstm_eth, y_pred_lstm_opt2_eth))
r2_lstm_opt2_eth = r2_score(y_test_lstm_eth, y_pred_lstm_opt2_eth)

# Modelo LSTM Optimización 3 (lookback 60)
lookback_60 = 60
X_seq_eth_60 = []
y_seq_eth_60 = []

for i in range(lookback_60, len(X_scaled_eth)):
    X_seq_eth_60.append(X_scaled_eth[i - lookback_60:i])
    y_seq_eth_60.append(y_full_eth[i])

X_seq_eth_60 = np.array(X_seq_eth_60)
y_seq_eth_60 = np.array(y_seq_eth_60)

split_eth_60 = int(len(X_seq_eth_60) * 0.8)
X_train_lstm_eth_60, X_test_lstm_eth_60 = X_seq_eth_60[:split_eth_60], X_seq_eth_60[split_eth_60:]
y_train_lstm_eth_60, y_test_lstm_eth_60 = y_seq_eth_60[:split_eth_60], y_seq_eth_60[split_eth_60:]

model_lstm_opt3_eth = Sequential()
model_lstm_opt3_eth.add(LSTM(100, return_sequences=True, input_shape=(lookback_60, 3)))
model_lstm_opt3_eth.add(Dropout(0.2))
model_lstm_opt3_eth.add(LSTM(50))
model_lstm_opt3_eth.add(Dropout(0.2))
model_lstm_opt3_eth.add(Dense(1))
model_lstm_opt3_eth.compile(optimizer='adam', loss='mean_absolute_error')

model_lstm_opt3_eth.fit(X_train_lstm_eth_60, y_train_lstm_eth_60, epochs=50, batch_size=32, verbose=0)
y_pred_lstm_opt3_eth = model_lstm_opt3_eth.predict(X_test_lstm_eth_60, verbose=0)

mae_lstm_opt3_eth = mean_absolute_error(y_test_lstm_eth_60, y_pred_lstm_opt3_eth)
rmse_lstm_opt3_eth = np.sqrt(mean_squared_error(y_test_lstm_eth_60, y_pred_lstm_opt3_eth))
r2_lstm_opt3_eth = r2_score(y_test_lstm_eth_60, y_pred_lstm_opt3_eth)

# Comparación LSTM
print("Comparación LSTM - Ethereum:")
print(f"Base - MAE: {mae_lstm_base_eth:.2f}, RMSE: {rmse_lstm_base_eth:.2f}, R²: {r2_lstm_base_eth:.4f}")
print(f"Opt1 - MAE: {mae_lstm_opt1_eth:.2f}, RMSE: {rmse_lstm_opt1_eth:.2f}, R²: {r2_lstm_opt1_eth:.4f}")
print(f"Opt2 - MAE: {mae_lstm_opt2_eth:.2f}, RMSE: {rmse_lstm_opt2_eth:.2f}, R²: {r2_lstm_opt2_eth:.4f}")
print(f"Opt3 - MAE: {mae_lstm_opt3_eth:.2f}, RMSE: {rmse_lstm_opt3_eth:.2f}, R²: {r2_lstm_opt3_eth:.4f}")

  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)


Comparación LSTM - Ethereum:
Base - MAE: 551.16, RMSE: 708.38, R²: -1.0003
Opt1 - MAE: 2940.15, RMSE: 2982.51, R²: -34.4581
Opt2 - MAE: 2943.96, RMSE: 2986.27, R²: -34.5475
Opt3 - MAE: 2948.85, RMSE: 2990.92, R²: -34.7994


In [23]:
import joblib

joblib.dump(lasso_eth, "modelo_ethereum_lasso.joblib")

['modelo_ethereum_lasso.joblib']

## Selección del conjunto base para modelado automatizado

In [15]:
# Función para modelar cualquier criptomoneda
def modelo_completo_crypto(df, token_name, print_results=True):
    """
    Aplica todo el pipeline de modelado a una criptomoneda específica
    """
    results = {}

    # 1. Selección del conjunto base
    df_token = df[df["token"] == token_name].copy()
    df_token["close"] = pd.to_numeric(df_token["close"], errors='coerce')
    df_token["volume"] = pd.to_numeric(df_token["volume"], errors='coerce')
    df_token["marketCap"] = pd.to_numeric(df_token["marketCap"], errors='coerce')
    df_token["timestamp"] = pd.to_datetime(df_token["timestamp"], errors='coerce')
    df_token = df_token.sort_values("timestamp")

    # 2. Variables predictoras y target
    X = df_token[["close", "volume", "marketCap"]].shift(1)
    y = df_token["close"]
    X = X.iloc[1:]
    y = y.iloc[1:]

    # 3. División del conjunto
    X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)

    # 4. Escalamiento
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # 5. REGRESIÓN LINEAL
    # Base
    lr_model = LinearRegression()
    lr_model.fit(X_train_scaled, y_train)
    y_pred_lr = lr_model.predict(X_test_scaled)

    mae_lr = mean_absolute_error(y_test, y_pred_lr)
    rmse_lr = np.sqrt(mean_squared_error(y_test, y_pred_lr))
    r2_lr = r2_score(y_test, y_pred_lr)

    # CV
    tscv = TimeSeriesSplit(n_splits=5)
    lr_cv = LinearRegression()
    cv_scores = []

    for train_idx, val_idx in tscv.split(X_train_scaled):
        X_train_cv, X_val_cv = X_train_scaled[train_idx], X_train_scaled[val_idx]
        y_train_cv, y_val_cv = y_train.iloc[train_idx], y_train.iloc[val_idx]

        lr_cv.fit(X_train_cv, y_train_cv)
        y_pred_cv = lr_cv.predict(X_val_cv)
        cv_scores.append(mean_absolute_error(y_val_cv, y_pred_cv))

    lr_cv.fit(X_train_scaled, y_train)
    y_pred_lr_cv = lr_cv.predict(X_test_scaled)

    mae_lr_cv = mean_absolute_error(y_test, y_pred_lr_cv)
    rmse_lr_cv = np.sqrt(mean_squared_error(y_test, y_pred_lr_cv))
    r2_lr_cv = r2_score(y_test, y_pred_lr_cv)

    # Ridge
    ridge = Ridge(alpha=1.0)
    ridge.fit(X_train_scaled, y_train)
    y_pred_ridge = ridge.predict(X_test_scaled)

    mae_ridge = mean_absolute_error(y_test, y_pred_ridge)
    rmse_ridge = np.sqrt(mean_squared_error(y_test, y_pred_ridge))
    r2_ridge = r2_score(y_test, y_pred_ridge)

    # Lasso
    lasso = Lasso(alpha=0.1)
    lasso.fit(X_train_scaled, y_train)
    y_pred_lasso = lasso.predict(X_test_scaled)

    mae_lasso = mean_absolute_error(y_test, y_pred_lasso)
    rmse_lasso = np.sqrt(mean_squared_error(y_test, y_pred_lasso))
    r2_lasso = r2_score(y_test, y_pred_lasso)

    # 6. RANDOM FOREST
    # Base
    rf_base = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_base.fit(X_train, y_train)
    y_pred_rf_base = rf_base.predict(X_test)

    mae_rf_base = mean_absolute_error(y_test, y_pred_rf_base)
    rmse_rf_base = np.sqrt(mean_squared_error(y_test, y_pred_rf_base))
    r2_rf_base = r2_score(y_test, y_pred_rf_base)

    # CV
    rf_cv = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_cv_scores = []

    for train_idx, val_idx in tscv.split(X_train):
        X_train_rf_cv, X_val_rf_cv = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_train_rf_cv, y_val_rf_cv = y_train.iloc[train_idx], y_train.iloc[val_idx]

        rf_cv.fit(X_train_rf_cv, y_train_rf_cv)
        y_pred_rf_cv = rf_cv.predict(X_val_rf_cv)
        rf_cv_scores.append(mean_absolute_error(y_val_rf_cv, y_pred_rf_cv))

    rf_cv.fit(X_train, y_train)
    y_pred_rf_cv = rf_cv.predict(X_test)

    mae_rf_cv = mean_absolute_error(y_test, y_pred_rf_cv)
    rmse_rf_cv = np.sqrt(mean_squared_error(y_test, y_pred_rf_cv))
    r2_rf_cv = r2_score(y_test, y_pred_rf_cv)

    # Optimizado
    param_dist = {
        'n_estimators': [100, 150, 200],
        'max_depth': [5, 10, 15, None],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': ['sqrt']
    }

    rf_opt = RandomForestRegressor(random_state=42)
    random_search = RandomizedSearchCV(
        estimator=rf_opt,
        param_distributions=param_dist,
        n_iter=25,
        cv=TimeSeriesSplit(n_splits=5),
        scoring='neg_mean_absolute_error',
        random_state=42,
        n_jobs=1
    )

    random_search.fit(X_train, y_train)
    rf_optimized = random_search.best_estimator_
    y_pred_rf_opt = rf_optimized.predict(X_test)

    mae_rf_opt = mean_absolute_error(y_test, y_pred_rf_opt)
    rmse_rf_opt = np.sqrt(mean_squared_error(y_test, y_pred_rf_opt))
    r2_rf_opt = r2_score(y_test, y_pred_rf_opt)

    # 7. LSTM
    # Preparación de datos
    X_full = df_token[["close", "volume", "marketCap"]].values
    y_full = df_token["close"].values

    scaler_lstm = StandardScaler()
    X_scaled_lstm = scaler_lstm.fit_transform(X_full)

    lookback = 30
    X_seq = []
    y_seq = []

    for i in range(lookback, len(X_scaled_lstm)):
        X_seq.append(X_scaled_lstm[i - lookback:i])
        y_seq.append(y_full[i])

    X_seq = np.array(X_seq)
    y_seq = np.array(y_seq)

    split = int(len(X_seq) * 0.8)
    X_train_lstm, X_test_lstm = X_seq[:split], X_seq[split:]
    y_train_lstm, y_test_lstm = y_seq[:split], y_seq[split:]

    # Base LSTM
    model_lstm_base = Sequential()
    model_lstm_base.add(LSTM(50, activation='relu', input_shape=(lookback, 3)))
    model_lstm_base.add(Dense(1))
    model_lstm_base.compile(optimizer='adam', loss='mean_absolute_error')

    model_lstm_base.fit(X_train_lstm, y_train_lstm, epochs=50, batch_size=32, verbose=0)
    y_pred_lstm_base = model_lstm_base.predict(X_test_lstm, verbose=0)

    mae_lstm_base = mean_absolute_error(y_test_lstm, y_pred_lstm_base)
    rmse_lstm_base = np.sqrt(mean_squared_error(y_test_lstm, y_pred_lstm_base))
    r2_lstm_base = r2_score(y_test_lstm, y_pred_lstm_base)

    # LSTM Opt1 (más capas)
    model_lstm_opt1 = Sequential()
    model_lstm_opt1.add(LSTM(50, return_sequences=True, input_shape=(lookback, 3)))
    model_lstm_opt1.add(LSTM(50))
    model_lstm_opt1.add(Dense(1))
    model_lstm_opt1.compile(optimizer='adam', loss='mean_absolute_error')

    model_lstm_opt1.fit(X_train_lstm, y_train_lstm, epochs=50, batch_size=32, verbose=0)
    y_pred_lstm_opt1 = model_lstm_opt1.predict(X_test_lstm, verbose=0)

    mae_lstm_opt1 = mean_absolute_error(y_test_lstm, y_pred_lstm_opt1)
    rmse_lstm_opt1 = np.sqrt(mean_squared_error(y_test_lstm, y_pred_lstm_opt1))
    r2_lstm_opt1 = r2_score(y_test_lstm, y_pred_lstm_opt1)

    # LSTM Opt2 (con Dropout)
    model_lstm_opt2 = Sequential()
    model_lstm_opt2.add(LSTM(100, return_sequences=True, input_shape=(lookback, 3)))
    model_lstm_opt2.add(Dropout(0.2))
    model_lstm_opt2.add(LSTM(50))
    model_lstm_opt2.add(Dropout(0.2))
    model_lstm_opt2.add(Dense(1))
    model_lstm_opt2.compile(optimizer='adam', loss='mean_absolute_error')

    model_lstm_opt2.fit(X_train_lstm, y_train_lstm, epochs=50, batch_size=32, verbose=0)
    y_pred_lstm_opt2 = model_lstm_opt2.predict(X_test_lstm, verbose=0)

    mae_lstm_opt2 = mean_absolute_error(y_test_lstm, y_pred_lstm_opt2)
    rmse_lstm_opt2 = np.sqrt(mean_squared_error(y_test_lstm, y_pred_lstm_opt2))
    r2_lstm_opt2 = r2_score(y_test_lstm, y_pred_lstm_opt2)

    # LSTM Opt3 (lookback 60)
    lookback_60 = 60
    X_seq_60 = []
    y_seq_60 = []

    for i in range(lookback_60, len(X_scaled_lstm)):
        X_seq_60.append(X_scaled_lstm[i - lookback_60:i])
        y_seq_60.append(y_full[i])

    X_seq_60 = np.array(X_seq_60)
    y_seq_60 = np.array(y_seq_60)

    split_60 = int(len(X_seq_60) * 0.8)
    X_train_lstm_60, X_test_lstm_60 = X_seq_60[:split_60], X_seq_60[split_60:]
    y_train_lstm_60, y_test_lstm_60 = y_seq_60[:split_60], y_seq_60[split_60:]

    model_lstm_opt3 = Sequential()
    model_lstm_opt3.add(LSTM(100, return_sequences=True, input_shape=(lookback_60, 3)))
    model_lstm_opt3.add(Dropout(0.2))
    model_lstm_opt3.add(LSTM(50))
    model_lstm_opt3.add(Dropout(0.2))
    model_lstm_opt3.add(Dense(1))
    model_lstm_opt3.compile(optimizer='adam', loss='mean_absolute_error')

    model_lstm_opt3.fit(X_train_lstm_60, y_train_lstm_60, epochs=50, batch_size=32, verbose=0)
    y_pred_lstm_opt3 = model_lstm_opt3.predict(X_test_lstm_60, verbose=0)

    mae_lstm_opt3 = mean_absolute_error(y_test_lstm_60, y_pred_lstm_opt3)
    rmse_lstm_opt3 = np.sqrt(mean_squared_error(y_test_lstm_60, y_pred_lstm_opt3))
    r2_lstm_opt3 = r2_score(y_test_lstm_60, y_pred_lstm_opt3)

    # 8. Compilar resultados
    results = {
        'token': token_name,
        'regresion_lineal': {
            'base': {'MAE': mae_lr, 'RMSE': rmse_lr, 'R2': r2_lr},
            'cv': {'MAE': mae_lr_cv, 'RMSE': rmse_lr_cv, 'R2': r2_lr_cv},
            'ridge': {'MAE': mae_ridge, 'RMSE': rmse_ridge, 'R2': r2_ridge},
            'lasso': {'MAE': mae_lasso, 'RMSE': rmse_lasso, 'R2': r2_lasso}
        },
        'random_forest': {
            'base': {'MAE': mae_rf_base, 'RMSE': rmse_rf_base, 'R2': r2_rf_base},
            'cv': {'MAE': mae_rf_cv, 'RMSE': rmse_rf_cv, 'R2': r2_rf_cv},
            'optimizado': {'MAE': mae_rf_opt, 'RMSE': rmse_rf_opt, 'R2': r2_rf_opt}
        },
        'lstm': {
            'base': {'MAE': mae_lstm_base, 'RMSE': rmse_lstm_base, 'R2': r2_lstm_base},
            'opt1': {'MAE': mae_lstm_opt1, 'RMSE': rmse_lstm_opt1, 'R2': r2_lstm_opt1},
            'opt2': {'MAE': mae_lstm_opt2, 'RMSE': rmse_lstm_opt2, 'R2': r2_lstm_opt2},
            'opt3': {'MAE': mae_lstm_opt3, 'RMSE': rmse_lstm_opt3, 'R2': r2_lstm_opt3}
        }
    }

    if print_results:
        print(f"\\n{'='*50}")
        print(f"RESULTADOS PARA {token_name.upper()}")
        print(f"{'='*50}")

        print(f"\\nREGRESIÓN LINEAL:")
        print(f"Base - MAE: {mae_lr:.2f}, RMSE: {rmse_lr:.2f}, R²: {r2_lr:.4f}")
        print(f"CV - MAE: {mae_lr_cv:.2f}, RMSE: {rmse_lr_cv:.2f}, R²: {r2_lr_cv:.4f}")
        print(f"Ridge - MAE: {mae_ridge:.2f}, RMSE: {rmse_ridge:.2f}, R²: {r2_ridge:.4f}")
        print(f"Lasso - MAE: {mae_lasso:.2f}, RMSE: {rmse_lasso:.2f}, R²: {r2_lasso:.4f}")

        print(f"\\nRANDOM FOREST:")
        print(f"Base - MAE: {mae_rf_base:.2f}, RMSE: {rmse_rf_base:.2f}, R²: {r2_rf_base:.4f}")
        print(f"CV - MAE: {mae_rf_cv:.2f}, RMSE: {rmse_rf_cv:.2f}, R²: {r2_rf_cv:.4f}")
        print(f"Optimizado - MAE: {mae_rf_opt:.2f}, RMSE: {rmse_rf_opt:.2f}, R²: {r2_rf_opt:.4f}")

        print(f"\\nLSTM:")
        print(f"Base - MAE: {mae_lstm_base:.2f}, RMSE: {rmse_lstm_base:.2f}, R²: {r2_lstm_base:.4f}")
        print(f"Opt1 - MAE: {mae_lstm_opt1:.2f}, RMSE: {rmse_lstm_opt1:.2f}, R²: {r2_lstm_opt1:.4f}")
        print(f"Opt2 - MAE: {mae_lstm_opt2:.2f}, RMSE: {rmse_lstm_opt2:.2f}, R²: {r2_lstm_opt2:.4f}")
        print(f"Opt3 - MAE: {mae_lstm_opt3:.2f}, RMSE: {rmse_lstm_opt3:.2f}, R²: {r2_lstm_opt3:.4f}")

    modelos_mae = {
        'lr_base'      : (lr_model,        mae_lr),
        'lr_cv'        : (lr_cv,           mae_lr_cv),
        'ridge'        : (ridge,           mae_ridge),
        'lasso'        : (lasso,           mae_lasso),
        'rf_base'      : (rf_base,         mae_rf_base),
        'rf_cv'        : (rf_cv,           mae_rf_cv),
        'rf_opt'       : (rf_optimized,    mae_rf_opt),
        'lstm_base'    : (model_lstm_base, mae_lstm_base),
        'lstm_opt1'    : (model_lstm_opt1, mae_lstm_opt1),
        'lstm_opt2'    : (model_lstm_opt2, mae_lstm_opt2),
        'lstm_opt3'    : (model_lstm_opt3, mae_lstm_opt3),
    }

    # 9. Seleccionar el nombre con el MAE mínimo
    best_name, (best_model, best_mae) = min(modelos_mae.items(), key=lambda kv: kv[1][1])

    if print_results:
        print(f"\n>> El mejor modelo es '{best_name}' con MAE = {best_mae:.4f}")

    # 10. Guardar el modelo
    filename = f"{token_name}_{best_name}.h5"
    # Si es un modelo Keras (hereda de tf.keras.Model), usa save()
    from tensorflow.keras import Model as KerasModel
    if isinstance(best_model, KerasModel):
        best_model.save(filename)
    else:
        joblib.dump(best_model, filename)

    if print_results:
        print(f">> Modelo guardado en: {filename}")

    # 11. Devolver el nombre y ruta del mejor modelo
    results['best_model'] = {'name': best_name, 'mae': best_mae, 'path': filename}
    return results

## 4.2. Ethereum

In [24]:
results_eth = modelo_completo_crypto(df, "Ethereum", print_results=True)

  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)


RESULTADOS PARA ETHEREUM
\nREGRESIÓN LINEAL:
Base - MAE: 74.31, RMSE: 104.44, R²: 0.9576
CV - MAE: 74.31, RMSE: 104.44, R²: 0.9576
Ridge - MAE: 73.97, RMSE: 103.87, R²: 0.9581
Lasso - MAE: 73.76, RMSE: 103.68, R²: 0.9582
\nRANDOM FOREST:
Base - MAE: 106.42, RMSE: 142.92, R²: 0.9206
CV - MAE: 106.42, RMSE: 142.92, R²: 0.9206
Optimizado - MAE: 99.64, RMSE: 134.70, R²: 0.9295
\nLSTM:
Base - MAE: 401.59, RMSE: 524.26, R²: -0.0956
Opt1 - MAE: 2942.95, RMSE: 2985.27, R²: -34.5237
Opt2 - MAE: 2943.00, RMSE: 2985.32, R²: -34.5250
Opt3 - MAE: 2949.84, RMSE: 2991.89, R²: -34.8227

>> El mejor modelo es 'lasso' con MAE = 73.7587
>> Modelo guardado en: Ethereum_lasso.h5


## 4.3. Polkadot

In [16]:
results_polkadot = modelo_completo_crypto(df, "Polkadot", print_results=True)

  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)


RESULTADOS PARA POLKADOT
\nREGRESIÓN LINEAL:
Base - MAE: 0.77, RMSE: 0.92, R²: 0.7724
CV - MAE: 0.77, RMSE: 0.92, R²: 0.7724
Ridge - MAE: 0.78, RMSE: 0.93, R²: 0.7630
Lasso - MAE: 0.82, RMSE: 0.95, R²: 0.7561
\nRANDOM FOREST:
Base - MAE: 0.28, RMSE: 0.45, R²: 0.9460
CV - MAE: 0.28, RMSE: 0.45, R²: 0.9460
Optimizado - MAE: 0.73, RMSE: 1.03, R²: 0.7121
\nLSTM:
Base - MAE: 0.84, RMSE: 1.09, R²: 0.6789
Opt1 - MAE: 0.70, RMSE: 0.96, R²: 0.7527
Opt2 - MAE: 0.88, RMSE: 1.07, R²: 0.6935
Opt3 - MAE: 0.89, RMSE: 1.08, R²: 0.6853

>> El mejor modelo es 'rf_base' con MAE = 0.2762
>> Modelo guardado en: Polkadot_rf_base.h5


## 4.4. Oasis

In [17]:
results_oasis = modelo_completo_crypto(df, "Oasis", print_results=True)

  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)


RESULTADOS PARA OASIS
\nREGRESIÓN LINEAL:
Base - MAE: 0.01, RMSE: 0.01, R²: 0.9343
CV - MAE: 0.01, RMSE: 0.01, R²: 0.9343
Ridge - MAE: 0.01, RMSE: 0.01, R²: 0.9340
Lasso - MAE: 0.03, RMSE: 0.03, R²: -0.2526
\nRANDOM FOREST:
Base - MAE: 0.00, RMSE: 0.01, R²: 0.9457
CV - MAE: 0.00, RMSE: 0.01, R²: 0.9457
Optimizado - MAE: 0.01, RMSE: 0.01, R²: 0.8899
\nLSTM:
Base - MAE: 0.01, RMSE: 0.01, R²: 0.8906
Opt1 - MAE: 0.00, RMSE: 0.01, R²: 0.9509
Opt2 - MAE: 0.00, RMSE: 0.01, R²: 0.9463
Opt3 - MAE: 0.00, RMSE: 0.01, R²: 0.9244

>> El mejor modelo es 'lstm_opt1' con MAE = 0.0040
>> Modelo guardado en: Oasis_lstm_opt1.h5


## 4.5. Nervos Network

In [18]:
results_nervos = modelo_completo_crypto(df, "Nervos Network", print_results=True)

  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)


RESULTADOS PARA NERVOS NETWORK
\nREGRESIÓN LINEAL:
Base - MAE: 0.00, RMSE: 0.00, R²: 0.9568
CV - MAE: 0.00, RMSE: 0.00, R²: 0.9568
Ridge - MAE: 0.00, RMSE: 0.00, R²: 0.9565
Lasso - MAE: 0.01, RMSE: 0.01, R²: -0.9259
\nRANDOM FOREST:
Base - MAE: 0.00, RMSE: 0.00, R²: 0.9306
CV - MAE: 0.00, RMSE: 0.00, R²: 0.9306
Optimizado - MAE: 0.00, RMSE: 0.00, R²: 0.3759
\nLSTM:
Base - MAE: 0.00, RMSE: 0.00, R²: 0.2394
Opt1 - MAE: 0.00, RMSE: 0.00, R²: 0.8506
Opt2 - MAE: 0.00, RMSE: 0.00, R²: 0.9074
Opt3 - MAE: 0.00, RMSE: 0.00, R²: 0.8304

>> El mejor modelo es 'lr_base' con MAE = 0.0007
>> Modelo guardado en: Nervos Network_lr_base.h5


## 4.6. Terra Classic

In [19]:
results_terra = modelo_completo_crypto(df, "Terra Classic", print_results=True)

  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)


RESULTADOS PARA TERRA CLASSIC
\nREGRESIÓN LINEAL:
Base - MAE: 0.06, RMSE: 0.07, R²: -5852313.4375
CV - MAE: 0.06, RMSE: 0.07, R²: -5852313.4375
Ridge - MAE: 0.07, RMSE: 0.07, R²: -6002558.6653
Lasso - MAE: 0.13, RMSE: 0.13, R²: -22976706.9583
\nRANDOM FOREST:
Base - MAE: 0.00, RMSE: 0.00, R²: 0.9135
CV - MAE: 0.00, RMSE: 0.00, R²: 0.9135
Optimizado - MAE: 0.01, RMSE: 0.02, R²: -739692.1476
\nLSTM:
Base - MAE: 0.00, RMSE: 0.00, R²: -5451.1945
Opt1 - MAE: 0.01, RMSE: 0.01, R²: -72812.7162
Opt2 - MAE: 0.01, RMSE: 0.01, R²: -87542.7872
Opt3 - MAE: 0.01, RMSE: 0.01, R²: -71739.4023

>> El mejor modelo es 'rf_base' con MAE = 0.0000
>> Modelo guardado en: Terra Classic_rf_base.h5


## 4.7. The Graph

In [20]:
results_graph = modelo_completo_crypto(df, "The Graph", print_results=True)

  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)


RESULTADOS PARA THE GRAPH
\nREGRESIÓN LINEAL:
Base - MAE: 0.02, RMSE: 0.02, R²: 0.9092
CV - MAE: 0.02, RMSE: 0.02, R²: 0.9092
Ridge - MAE: 0.02, RMSE: 0.02, R²: 0.9068
Lasso - MAE: 0.05, RMSE: 0.05, R²: 0.5158
\nRANDOM FOREST:
Base - MAE: 0.02, RMSE: 0.02, R²: 0.9133
CV - MAE: 0.02, RMSE: 0.02, R²: 0.9133
Optimizado - MAE: 0.04, RMSE: 0.06, R²: 0.4160
\nLSTM:
Base - MAE: 0.02, RMSE: 0.03, R²: 0.8350
Opt1 - MAE: 0.01, RMSE: 0.01, R²: 0.9611
Opt2 - MAE: 0.01, RMSE: 0.02, R²: 0.9450
Opt3 - MAE: 0.01, RMSE: 0.02, R²: 0.9293

>> El mejor modelo es 'lstm_opt1' con MAE = 0.0105
>> Modelo guardado en: The Graph_lstm_opt1.h5


## 4.8. Algorand

In [21]:
results_algorand = modelo_completo_crypto(df, "Algorand", print_results=True)

  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)


RESULTADOS PARA ALGORAND
\nREGRESIÓN LINEAL:
Base - MAE: 0.01, RMSE: 0.02, R²: 0.9534
CV - MAE: 0.01, RMSE: 0.02, R²: 0.9534
Ridge - MAE: 0.01, RMSE: 0.02, R²: 0.9515
Lasso - MAE: 0.08, RMSE: 0.08, R²: 0.0709
\nRANDOM FOREST:
Base - MAE: 0.01, RMSE: 0.02, R²: 0.9660
CV - MAE: 0.01, RMSE: 0.02, R²: 0.9660
Optimizado - MAE: 0.03, RMSE: 0.05, R²: 0.6419
\nLSTM:
Base - MAE: 0.01, RMSE: 0.02, R²: 0.9526
Opt1 - MAE: 0.01, RMSE: 0.02, R²: 0.9689
Opt2 - MAE: 0.01, RMSE: 0.02, R²: 0.9470
Opt3 - MAE: 0.01, RMSE: 0.02, R²: 0.9463

>> El mejor modelo es 'lstm_opt1' con MAE = 0.0088
>> Modelo guardado en: Algorand_lstm_opt1.h5
