<a href="https://colab.research.google.com/github/DaniOsuna/Trading_Model_Script/blob/main/Copia_de_Trading_Model_Script_Maestro_10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
################################################################################
# SCRIPT MAESTRO 14 - Descarga CSV de CryptoDataDownload + Clasificación LSTM
# Ajustado para renombrar "Volume BTC"/"Volume USDT" a "Volume"
################################################################################

from google.colab import drive
drive.mount('/content/drive')

!pip install pandas_ta requests

import os
import random
import numpy as np
import pandas as pd
import pandas_ta as ta
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from sklearn.preprocessing import RobustScaler
import requests
import time

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

print("=== INICIO DEL SCRIPT MAESTRO 13 (Descarga CSV + Clasificación) ===")

############################
# 1) PARÁMETROS
############################
CSV_URL  = "https://www.cryptodatadownload.com/cdd/Binance_BTCUSDT_d.csv"  # Ejemplo
LOCAL_CSV = "/content/drive/MyDrive/Binance_BTCUSDT_daily.csv"

THRESHOLD_GAIN= 0.003
WINDOW        = 30
STOP_LOSS     = 0.02
TAKE_PROFIT   = 0.03
COMMISSION    = 0.001
TEST_DAYS     = 60
LR            = 1e-4
BATCH_SIZE    = 32
EPOCHS        = 20

SAVE_TRADES_CSV = "/content/drive/MyDrive/Trading_Optimization_LSTM/paper_trades_binaria_diaria.csv"

############################
# 2) Descargar CSV
############################
def download_csv_from_cryptodatadownload(url, local_path):
    max_retries=3
    for attempt in range(max_retries):
        try:
            print(f"Descargando CSV desde: {url}")
            r= requests.get(url, timeout=30)
            if r.status_code==200:
                with open(local_path, 'wb') as f:
                    f.write(r.content)
                print(f"Guardado => {local_path}")
                return True
            else:
                print(f"Error HTTP: {r.status_code}. Reintento en 60s...")
                time.sleep(60)
        except Exception as e:
            print(f"Excepcion: {e}. Reintento en 60s...")
            time.sleep(60)
    return False

############################
# 3) Weighted BCE
############################
def weighted_bce(y_true, y_pred):
    import tensorflow as tf
    from tensorflow.keras.backend import epsilon
    weight_for_1 = 3.0
    y_true_f = tf.cast(y_true, tf.float32)
    y_pred_f = tf.clip_by_value(y_pred, epsilon(), 1.0 - epsilon())
    w0 = 1.0
    w1 = weight_for_1
    bce = - ( w1*y_true_f*tf.math.log(y_pred_f) + w0*(1.0-y_true_f)*tf.math.log(1.0-y_pred_f) )
    return tf.reduce_mean(bce)

############################
# 4) BACKTEST BINARIO
############################
def backtest_binario(df_test, stop_loss=0.02, take_profit=0.03, commission=0.001):
    pnl_list=[]
    in_position=False
    open_price=None
    daily_ret=[]
    for i in range(len(df_test)-1):
        row= df_test.iloc[i]
        if not in_position:
            if row['y_pred_bin']==1:
                open_price= row['OpenShift']
                in_position=True
                pnl_list.append(0.0)
                daily_ret.append(0.0)
            else:
                pnl_list.append(0.0)
                daily_ret.append(0.0)
        else:
            current_price= row['OpenShift']
            lat_gain= (current_price - open_price)/open_price
            close_trade=False
            if lat_gain<= -stop_loss: close_trade=True
            if lat_gain>= take_profit: close_trade=True
            if row['y_pred_bin']==0: close_trade=True
            if close_trade:
                final_gain= lat_gain - 2*commission
                pnl_list.append(final_gain)
                daily_ret.append(final_gain)
                in_position=False
                open_price=None
            else:
                pnl_list.append(0.0)
                daily_ret.append(0.0)
    pnl_list.append(0.0)
    daily_ret.append(0.0)
    df_test['PnL']= pnl_list
    df_test['CumPnL']= (1+df_test['PnL']).cumprod()-1
    return df_test, daily_ret

############################
# 5) METRICAS
############################
def calc_metrics(daily_ret):
    import pandas as pd
    rets= pd.Series(daily_ret)
    if len(rets)<1:
        return (0,0,0,0)
    cumret= (1+ rets).cumprod()-1
    peak= cumret.cummax()
    dd= (peak-cumret).max()
    std_= rets.std()
    sharpe= rets.mean()/std_*np.sqrt(365) if std_>1e-9 else 0.0
    neg= rets[rets<0]
    std_neg= neg.std()
    sortino= rets.mean()/ (std_neg+1e-9)*np.sqrt(365) if std_neg>1e-9 else 0.0
    gains= rets[rets>0].sum()
    losses= abs(rets[rets<0].sum())
    pf= (gains/losses) if losses>1e-9 else 999.0
    return (sharpe, sortino, dd, pf)

############################
# 6) MAIN
############################
def main_csv_pipeline():
    # 1) Descargar CSV
    ok= download_csv_from_cryptodatadownload(CSV_URL, LOCAL_CSV)
    if not ok:
        print("No se pudo descargar CSV. Terminamos.")
        return

    # 2) Leer CSV con skiprows=1
    df_raw= pd.read_csv(LOCAL_CSV, skiprows=1)
    # Renombrar columnas: "date"->"Date", "Volume BTC"->"Volume"
    df_raw.rename(columns={
        'date': 'Date',
        'Volume BTC': 'Volume',   # CriptoDataDownload lo llama "Volume BTC"
        'Volume USDT': 'VolumeUSDT'
        }, inplace=True, errors='ignore')

    # parse Date
    df_raw['Date']= pd.to_datetime(df_raw['Date'])
    df_raw.sort_values(by='Date', inplace=True)
    df_raw.set_index('Date', inplace=True)

    # Ver si "Volume" no existe pero "VolumeUSDT" existe
    # a veces preferimos "VolumeUSDT" => rename => "Volume"
    if "Volume" not in df_raw.columns and "VolumeUSDT" in df_raw.columns:
        df_raw.rename(columns={"VolumeUSDT":"Volume"}, inplace=True)

    # Aseguramos "Open","High","Low","Close","Volume"
    needed= ["Open","High","Low","Close","Volume"]
    missing= [c for c in needed if c not in df_raw.columns]
    if missing:
        print(f"Falta col => {missing}. Revisa CSV de cryptodatadownload.")
        return

    df = df_raw.copy()
    df.dropna(subset=needed, inplace=True)
    print("df shape tras parse:", df.shape)
    print(df.tail(5))

    # 3) Indicadores
    df['RSI14']= ta.rsi(df['Close'], length=14)
    macd_= ta.macd(df['Close'], fast=12, slow=26)
    df['MACD']= macd_['MACD_12_26_9']
    df['MACDs']= macd_['MACDs_12_26_9']
    df['EMA7']= ta.ema(df['Close'], length=7)
    df['EMA21']= ta.ema(df['Close'], length=21)
    bb= ta.bbands(df['Close'], length=20)
    df['BBU']= bb['BBU_20_2.0']
    df['BBM']= bb['BBM_20_2.0']
    df['BBL']= bb['BBL_20_2.0']
    stochrsi= ta.stochrsi(df['Close'], length=14)
    df['STOCHRSIk']= stochrsi['STOCHRSIk_14_14_3_3']
    df['STOCHRSId']= stochrsi['STOCHRSId_14_14_3_3']
    df.dropna(inplace=True)

    # ReturnFut
    df['CloseShift']= df['Close'].shift(-1)
    df['ReturnFut']= (df['CloseShift']- df['Close'])/df['Close']
    df.dropna(inplace=True)

    # y_bin => 1 si >= THRESHOLD_GAIN
    df['y_bin']= (df['ReturnFut']>= THRESHOLD_GAIN).astype(int)

    # 4) train vs test => ultima 60 dias
    if len(df)< (TEST_DAYS+WINDOW+5):
        print("No hay data suficiente post-limpieza.")
        return
    df_train= df.iloc[:-TEST_DAYS].copy()
    df_test = df.iloc[-TEST_DAYS:].copy()

    feat_cols= [
        "Open","High","Low","Close","Volume",
        "RSI14","MACD","MACDs","EMA7","EMA21","BBU","BBM","BBL","STOCHRSIk","STOCHRSId"
    ]
    df_train.dropna(subset=feat_cols, inplace=True)
    df_test.dropna(subset=feat_cols, inplace=True)

    # 5) Escalado
    scaler= RobustScaler()
    X_train_2D= scaler.fit_transform(df_train[feat_cols].values)
    y_train_1D= df_train['y_bin'].values

    def create_seq_bin(feat2D, targ1D, w):
        X,y=[],[]
        for i in range(len(feat2D)-w):
            X.append(feat2D[i:i+w])
            y.append(targ1D[i+w])
        return np.array(X), np.array(y)

    X_tr, y_tr= create_seq_bin(X_train_2D, y_train_1D, WINDOW)
    if len(X_tr)<10:
        print("Train no genera secuencias. Saliendo.")
        return

    # 6) Modelo
    model= Sequential([
        LSTM(64, return_sequences=True, input_shape=(WINDOW,len(feat_cols))),
        Dropout(0.3),
        LSTM(32, return_sequences=False),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    opt= Adam(learning_rate=LR)
    model.compile(optimizer=opt, loss=weighted_bce, metrics=['accuracy'])

    # 80/20 split
    val_size= int(len(X_tr)*0.2)
    X_trn= X_tr[:-val_size]
    y_trn= y_tr[:-val_size]
    X_val= X_tr[-val_size:]
    y_val= y_tr[-val_size:]

    steps_per_epoch= len(X_trn)//BATCH_SIZE
    if steps_per_epoch<1:
        print("No hay data batch.")
        return

    es= EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)
    rlrop= ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=1, min_lr=1e-7)

    print(f"Entrenando con {len(X_trn)} (train) + {len(X_val)} (val).")
    model.fit(
        X_trn, y_trn,
        validation_data=(X_val, y_val),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=[es, rlrop],
        shuffle=True,
        verbose=1
    )

    # 7) test => mini-backtest
    X_test_2D= scaler.transform(df_test[feat_cols].values)
    y_test_1D= df_test['y_bin'].values
    X_te, y_te= create_seq_bin(X_test_2D, y_test_1D, WINDOW)
    if len(X_te)<1:
        print("Test no genera secuencias. Saliendo.")
        return
    df_test_seq= df_test.iloc[WINDOW:].copy()
    df_test_seq= df_test_seq.iloc[: len(X_te)]
    df_test_seq.reset_index(drop=False, inplace=True)

    y_proba_test= model.predict(X_te).flatten()
    y_bin_test= (y_proba_test>=0.5).astype(int)
    df_test_seq['y_pred_bin']= y_bin_test
    df_test_seq['OpenShift']= df_test_seq['Open'].shift(-1)
    df_test_seq.dropna(inplace=True)

    df_test_seq, daily_ret= backtest_binario(df_test_seq, STOP_LOSS, TAKE_PROFIT, COMMISSION)
    final_pnl= df_test_seq['CumPnL'].iloc[-1]*100
    sharpe_t, sortino_t, dd_t, pf_t= calc_metrics(daily_ret)
    print(f"\n=== MINI-HOLDOUT => {len(df_test_seq)} muestras ===")
    print(f"PNL= {final_pnl:.2f}%, Sharpe= {sharpe_t:.2f}, Sortino= {sortino_t:.2f}, DD= {dd_t:.2%}, PF= {pf_t:.2f}")

    # 8) Señal "live"
    last_block= df.iloc[-WINDOW:].copy()
    if len(last_block)< WINDOW:
        print("No hay data live.")
        return
    feat_live= last_block[feat_cols].values
    feat_live_scaled= scaler.transform(feat_live)
    X_live= np.expand_dims(feat_live_scaled, axis=0)
    live_proba= model.predict(X_live).flatten()[0]
    live_bin= 1 if live_proba>=0.5 else 0

    last_dt= df.index[-1]
    signal_str= "BUY" if live_bin==1 else "NO_BUY"
    print(f"Señal del día => {signal_str} (prob={live_proba:.4f})")

    row_save= {
        "datetime": last_dt,
        "live_proba": live_proba,
        "signal": signal_str,
        "test_pnl": final_pnl,
        "test_pf": pf_t,
        "sharpe_test": sharpe_t,
        "sortino_test": sortino_t
    }
    df_save= pd.DataFrame([row_save])
    if os.path.isfile(SAVE_TRADES_CSV):
        df_save.to_csv(SAVE_TRADES_CSV, mode='a', header=False, index=False)
    else:
        df_save.to_csv(SAVE_TRADES_CSV, mode='w', header=True, index=False)

    print(f"Guardado en {SAVE_TRADES_CSV}")
    print("=== FIN DEL SCRIPT MAESTRO 13 (Descarga CSV + LSTM) ===")

if __name__=="__main__":
    main_csv_pipeline()

Mounted at /content/drive
Collecting pandas_ta
  Downloading pandas_ta-0.3.14b.tar.gz (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.1/115.1 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pandas_ta
  Building wheel for pandas_ta (setup.py) ... [?25l[?25hdone
  Created wheel for pandas_ta: filename=pandas_ta-0.3.14b0-py3-none-any.whl size=218909 sha256=4da94886d8b5d730db5a08d2eb359908cf298925fe31c62b6f4628c6233ea38d
  Stored in directory: /root/.cache/pip/wheels/7f/33/8b/50b245c5c65433cd8f5cb24ac15d97e5a3db2d41a8b6ae957d
Successfully built pandas_ta
Installing collected packages: pandas_ta
Successfully installed pandas_ta-0.3.14b0
=== INICIO DEL SCRIPT MAESTRO 13 (Descarga CSV + Clasificación) ===
Descargando CSV desde: https://www.cryptodatadownload.com/cdd/Binance_BTCUSDT_d.csv
Guardado => /content/drive/MyDrive/Binance_BTCUSDT_daily.csv
df shape tras parse

  super().__init__(**kwargs)


Entrenando con 2096 (train) + 524 (val).
Epoch 1/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 64ms/step - accuracy: 0.4847 - loss: 1.2928 - val_accuracy: 0.4828 - val_loss: 1.2494 - learning_rate: 1.0000e-04
Epoch 2/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 54ms/step - accuracy: 0.4635 - loss: 1.2128 - val_accuracy: 0.4656 - val_loss: 1.1968 - learning_rate: 1.0000e-04
Epoch 3/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 58ms/step - accuracy: 0.4735 - loss: 1.1594 - val_accuracy: 0.4332 - val_loss: 1.1586 - learning_rate: 1.0000e-04
Epoch 4/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 54ms/step - accuracy: 0.4684 - loss: 1.1586 - val_accuracy: 0.4332 - val_loss: 1.1463 - learning_rate: 1.0000e-04
Epoch 5/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - accuracy: 0.4689 - loss: 1.1520 - val_accuracy: 0.4332 - val_loss: 1.1439 - learning_rate: 1.0000e-04
Epoch 6/20
[