In [None]:
# Custom callback to print metrics
class MetricsLogger(Callback):
    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        print(f"Epoch {epoch + 1}: loss = {logs.get('loss')}, val_loss = {logs.get('val_loss')}, "
              f"mse = {logs.get('mse')}, val_mse = {logs.get('val_mse')}, "
              f"mae = {logs.get('mae')}, val_mae = {logs.get('val_mae')}")

# Function to build the model based on the parameters
def build_model(input_shape, units=[256, 128, 64], dropout_rate=0.2, recurrent_dropout_rate=0.2, optimizer='rmsprop', model_type='GRU'):
    model = Sequential()
    model.add(Input(shape=input_shape))

    for i, unit in enumerate(units):
        if model_type == 'GRU':
            model.add(GRU(unit, return_sequences=(i < len(units) - 1), dropout=dropout_rate, recurrent_dropout=recurrent_dropout_rate))
        elif model_type == 'LSTM':
            model.add(LSTM(unit, return_sequences=(i < len(units) - 1), dropout=dropout_rate, recurrent_dropout=recurrent_dropout_rate))
        elif model_type == 'Bidirectional_LSTM':
            model.add(Bidirectional(LSTM(unit, return_sequences=(i < len(units) - 1), dropout=dropout_rate, recurrent_dropout=recurrent_dropout_rate)))
        model.add(Dropout(dropout_rate))

    model.add(Dense(1))  # Capa de salida con una unidad para la predicción del valor objetivo
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mse', 'mae'])
    return model


# Function to save metrics and hyperparameters
def save_metrics_and_params(symbol, model_dir, best_score, best_params):
    metrics_path = os.path.join(model_dir, f"{symbol}_best_metrics.txt")
    with open(metrics_path, 'w') as f:
        f.write(f"Best Score (MAE): {best_score}\n")
        f.write("Best Hyperparameters:\n")
        for param, value in best_params.items():
            f.write(f"{param}: {value}\n")

def train_and_evaluate(symbol, model_dir, param_grid, sequence_length):
    print(f"Entrenando y evaluando modelo para {symbol}")

    # Cargar los datos preparados
    prepared_data_path = os.path.join("..//content/drive/My Drive/tradingcripto/data/processed", f"{symbol}_prepared_data.pkl")
    X_train, X_test, y_train, y_test, feature_scaler, target_scaler = joblib.load(prepared_data_path)

    if check_overlap(X_train, X_test):
        raise ValueError(f"Existe solapamiento entre los conjuntos de entrenamiento y validación para {symbol}")

    input_shape = (X_train.shape[1], X_train.shape[2])

    # Definir la ruta para guardar el mejor modelo
    symbol_model_dir = os.path.join(model_dir, symbol)
    os.makedirs(symbol_model_dir, exist_ok=True)
    best_model_path = os.path.join(symbol_model_dir, "best_model.keras")

    # Configurar la validación cruzada
    tscv = TimeSeriesSplit(n_splits=3)

    # Variables para almacenar los mejores resultados
    best_score = float('inf')
    best_params = None
    best_model_path_temp = None

    # Iterar sobre las combinaciones de hiperparámetros
    for units in param_grid['units']:
        for dropout_rate in param_grid['dropout_rate']:
            for optimizer in param_grid['optimizer']:
                for model_type in param_grid['model_type']:
                    for epochs in param_grid['epochs']:
                        for batch_size in param_grid['batch_size']:
                            val_losses = []

                            # Validación cruzada
                            for fold, (train_index, val_index) in enumerate(tscv.split(X_train)):
                                X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
                                y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

                                # Definir el modelo
                                model = build_model(input_shape, units=units, dropout_rate=dropout_rate, optimizer=optimizer, model_type=model_type)

                                # Inicializar el callback para el checkpoint personalizado
                                early_stopping = EarlyStopping(monitor='val_mae', mode='min', patience=6, verbose=1)
                                metrics_logger = MetricsLogger()

                                # Definir la ruta para guardar el modelo del fold actual
                                fold_model_path = os.path.join(symbol_model_dir, f"model_{units}_{dropout_rate}_{optimizer}_{model_type}_{epochs}_{batch_size}_fold_{fold}.keras")
                                checkpoint = ModelCheckpoint(fold_model_path, monitor='val_mae', save_best_only=True, mode='min', verbose=1)

                                try:
                                    history = model.fit(
                                        X_train_fold, y_train_fold,
                                        epochs=epochs,
                                        batch_size=batch_size,
                                        validation_data=(X_val_fold, y_val_fold),
                                        callbacks=[early_stopping, metrics_logger, checkpoint],
                                        verbose=0
                                    )

                                    val_mae = model.evaluate(X_val_fold, y_val_fold, verbose=0)[2]  # Get MAE only
                                    val_losses.append(val_mae)

                                except ConnectionResetError as e:
                                    print(f"Connection error encountered: {e}. Retrying...")
                                    continue

                            mean_val_loss = np.mean(val_losses)
                            print(f"Mean validation loss (MAE) for {symbol} with {units} units, {dropout_rate} dropout, {optimizer} optimizer, {model_type} model, {epochs} epochs, and {batch_size} batch size: {mean_val_loss}")

                            # Guardar los mejores hiperparámetros y el mejor modelo según la media de val_mae
                            if mean_val_loss < best_score:
                                best_score = mean_val_loss
                                best_params = {
                                    'units': units,
                                    'dropout_rate': dropout_rate,
                                    'optimizer': optimizer,
                                    'model_type': model_type,
                                    'epochs': epochs,
                                    'batch_size': batch_size
                                }

                                # Guardar la ruta del mejor modelo
                                best_model_path_temp = fold_model_path

    # Mover el mejor modelo a la ubicación final
    if best_model_path_temp and best_model_path_temp != best_model_path:
        shutil.move(best_model_path_temp, best_model_path)

    # Guardar las métricas y los hiperparámetros del mejor modelo
    save_metrics_and_params(symbol, model_dir, best_score, best_params)

    print(f"Best parameters for {symbol}: {best_params}")
    print(f"Best score for {symbol}: {best_score}")

    # Evaluar el mejor modelo
    best_model = load_model(best_model_path)
    predicted = best_model.predict(X_test)

    # Desnormalizar y_test y predicted
    y_test_rescaled = target_scaler.inverse_transform(y_test.reshape(-1, 1))
    predicted_rescaled = target_scaler.inverse_transform(predicted.reshape(-1, 1))

    # Calcular métricas
    mse = mean_squared_error(y_test_rescaled, predicted_rescaled)
    mae = mean_absolute_error(y_test_rescaled, predicted_rescaled)
    r2 = r2_score(y_test_rescaled, predicted_rescaled)

    print(f"MSE for {symbol}: {mse}")
    print(f"MAE for {symbol}: {mae}")
    print(f"R² for {symbol}: {r2}")

    # Graficar resultados
    plt.figure(figsize=(14, 7))
    plt.plot(y_test_rescaled, label='Real')
    plt.plot(predicted_rescaled, label='Predicho')
    plt.title(f'Predicciones de Precios con el Mejor Modelo para {symbol}')
    plt.legend()
    plt.show()

# Define the parameters for GridSearch
param_grid = {
    'units': [[256, 128, 64], [128, 64, 32], [64, 32, 16]],
    'dropout_rate': [0.2, 0.3],
    'recurrent_dropout_rate': [0.2, 0.3],
    'optimizer': ['rmsprop'],
    'model_type': ['Bidirectional_LSTM'],
    'epochs': [30],
    'batch_size': [64, 128]
}

# Función para verificar solapamiento
def check_overlap(train_data, test_data):
    train_set = set(map(tuple, train_data.reshape((train_data.shape[0], -1))))
    test_set = set(map(tuple, test_data.reshape((test_data.shape[0], -1))))
    overlap = train_set.intersection(test_set)
    return len(overlap) > 0

sequence_length = 50

# Define the path to save the logs
log_base_dir = '/content/drive/My Drive/tradingcripto/logs'

# Define the path to save the models
model_dir = os.path.abspath("/content/drive/My Drive/tradingcripto/models")
os.makedirs(model_dir, exist_ok=True)

In [None]:
def analyze_folds(symbol, sequence_length=50):
    # Cargar los datos preparados
    prepared_data_path = os.path.join("..//content/drive/My Drive/tradingcripto/data/processed", f"{symbol}_prepared_data.pkl")
    X_train, X_test, y_train, y_test, feature_scaler, target_scaler = joblib.load(prepared_data_path)

    # Configurar la validación cruzada
    tscv = TimeSeriesSplit(n_splits=3)

    fold = 1
    for train_index, val_index in tscv.split(X_train):
        X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

        print(f"Fold {fold}:")
        print(f"  Tamaño de X_train_fold: {X_train_fold.shape}")
        print(f"  Tamaño de X_val_fold: {X_val_fold.shape}")
        print(f"  Tamaño de y_train_fold: {y_train_fold.shape}")
        print(f"  Tamaño de y_val_fold: {y_val_fold.shape}")

        # Visualizar los datos
        plt.figure(figsize=(14, 7))
        plt.plot(y_train_fold, label='y_train_fold')
        plt.plot(range(len(y_train_fold), len(y_train_fold) + len(y_val_fold)), y_val_fold, label='y_val_fold')
        plt.title(f'Series Temporales para el Fold {fold}')
        plt.legend()
        plt.show()

        fold += 1

In [None]:
# Para BTCUSDT
train_and_evaluate('BTCUSDT', model_dir, param_grid, sequence_length)

In [None]:
# Usar la función para analizar los pliegues de BTCUSDT
analyze_folds('BTCUSDT')

In [None]:
# Para ETHUSDT
train_and_evaluate('ETHUSDT', model_dir, param_grid, sequence_length)

In [None]:
# Usar la función para analizar los pliegues de ETHUSDT
analyze_folds('ETHUSDT')

In [None]:
# Para XRPUSDT
train_and_evaluate('XRPUSDT', model_dir, param_grid, sequence_length)

In [None]:
# Usar la función para analizar los pliegues de XRPUSDT
analyze_folds('XRPUSDT')

In [None]:
# Para LTCUSDT
train_and_evaluate('LTCUSDT', model_dir, param_grid, sequence_length)

In [None]:
# Usar la función para analizar los pliegues de LTCUSDT
analyze_folds('LTCUSDT')

In [None]:
def load_best_params_from_models(model_dir, symbols):
    best_params_dict = {}

    for symbol in symbols:
        symbol_model_dir = os.path.join(model_dir, symbol)
        best_model_path = os.path.join(symbol_model_dir, "best_model.keras")

        if os.path.exists(best_model_path):
            # Load the best model
            model = load_model(best_model_path)
            print(f"Loaded model for {symbol} from {best_model_path}")

            # Load the best params if they were saved separately
            best_params_path = os.path.join(symbol_model_dir, "best_params.json")
            if os.path.exists(best_params_path):
                with open(best_params_path, 'r') as f:
                    best_params = json.load(f)
                    best_params_dict[symbol] = best_params
                    print(f"Loaded best params for {symbol} from {best_params_path}")
            else:
                print(f"Best params for {symbol} not found.")
        else:
            print(f"Model for {symbol} not found at {best_model_path}")

    return best_params_dict

In [None]:
# Define the path to save the models
model_dir = os.path.abspath("/content/drive/My Drive/tradingcripto/models")
symbols = ['BTCUSDT', 'ETHUSDT', 'XRPUSDT', 'LTCUSDT']  # Lista de símbolos a evaluar

# Load the best parameters from the saved models
best_params_dict = load_best_params_from_models(model_dir, symbols)

# Define a new param_grid for fine-tuning each symbol
fine_tune_param_grids = {}
for symbol, params in best_params_dict.items():
    fine_tune_param_grids[symbol] = {
        'units': [params['units'] - 10, params['units'], params['units'] + 10],
        'dropout_rate': [max(0, params['dropout_rate'] - 0.1), params['dropout_rate'], min(1, params['dropout_rate'] + 0.1)],
        'optimizer': [params['optimizer']],
        'model_type': [params['model_type']],
        'epochs': [60],
        'batch_size': [params['batch_size']]
    }

In [None]:
# Perform fine-tuning for each symbol
for symbol in symbols:
    if symbol in fine_tune_param_grids:
        print(f"Fine-tuning model for {symbol}")
        fine_tune_param_grid = fine_tune_param_grids[symbol]
        train_and_evaluate(symbol, model_dir, fine_tune_param_grid, sequence_length)
    else:
        print(f"No best params found for {symbol}, skipping fine-tuning.")