<a href="https://colab.research.google.com/github/DaniOsuna/trading_lstm_4h.py/blob/main/trading_lstm_4h_binance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
################################################################################
# SCRIPT MAESTRO - LSTM CLASIFICACIÓN EN 4H BINANCE (Paper Trading)
################################################################################
import os
import random
import numpy as np
import pandas as pd
import pandas_ta as ta
import matplotlib.pyplot as plt
import time

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from sklearn.preprocessing import RobustScaler
from binance.client import Client   # Para extraer velas 4h

#########################
# 1) SEED + PARAMETROS
#########################
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

BINANCE_API_KEY    = "e1ZbxHrFw8R3UObyAjRarYkpwtY2kKy9YEPEdrsV0B1Uj8MJFLt0grpUtd0NN04l"
BINANCE_API_SECRET = "bhnq7RYVVowH0Ov6sDYNCEnU5N5HH4qTa0Z2hChBxJqbwKWIJhBh8cfeS5emekRp"

SYMBOL             = "BTCUSDT"
INTERVAL           = Client.KLINE_INTERVAL_4HOUR
LIMIT_KLINES       = 500   # 4h * 500 = ~ 83 dias

THRESHOLD_GAIN     = 0.003   # 0.3%
WINDOW             = 30      # 30 velas 4h => 5 dias
STOP_LOSS          = 0.02    # 2%
TAKE_PROFIT        = 0.03    # 3%
COMMISSION         = 0.001   # 0.1% por trade
TEST_DAYS          = 180     # 180 velas 4h => 30 dias
LR                 = 1e-4
BATCH_SIZE         = 32
EPOCHS             = 20

OUTPUT_CSV         = "paper_trades_lstm_4h.csv"

#########################
# 2) DESCARGAR VELAS 4H
#########################
def download_klines_4h(symbol, interval, limit):
    client = Client(BINANCE_API_KEY, BINANCE_API_SECRET)
    klines = client.get_klines(symbol=symbol, interval=interval, limit=limit)
    cols = ["open_time","Open","High","Low","Close","Volume",
            "close_time","quote_asset_volume","num_trades",
            "taker_buy_base","taker_buy_quote","ignored"]
    df = pd.DataFrame(klines, columns=cols)
    # Index por open_time
    df["open_time"] = pd.to_datetime(df["open_time"], unit='ms')
    df.set_index("open_time", inplace=True)

    # Convertir a float
    for c in ["Open","High","Low","Close","Volume"]:
        df[c] = df[c].astype(float)

    return df

#########################
# 3) Weighted BCE
#########################
def weighted_bce(y_true, y_pred):
    import tensorflow as tf
    from tensorflow.keras.backend import epsilon
    weight_for_1 = 3.0
    y_true_f = tf.cast(y_true, tf.float32)
    y_pred_f = tf.clip_by_value(y_pred, epsilon(), 1.0 - epsilon())
    w0 = 1.0
    w1 = weight_for_1
    bce = - ( w1*y_true_f*tf.math.log(y_pred_f) + w0*(1.0-y_true_f)*tf.math.log(1.0-y_pred_f) )
    return tf.reduce_mean(bce)

#########################
# 4) BACKTEST BINARIO
#########################
def backtest_binario(df_test, stop_loss=0.02, take_profit=0.03, commission=0.001):
    pnl_list=[]
    in_position=False
    open_price=None
    daily_ret=[]
    for i in range(len(df_test)-1):
        row= df_test.iloc[i]
        if not in_position:
            if row['y_pred_bin']==1:
                open_price= row['OpenShift']
                in_position=True
                pnl_list.append(0.0)
                daily_ret.append(0.0)
            else:
                pnl_list.append(0.0)
                daily_ret.append(0.0)
        else:
            current_price= row['OpenShift']
            lat_gain= (current_price - open_price)/open_price
            close_trade=False
            if lat_gain<=-stop_loss: close_trade=True
            if lat_gain>=take_profit: close_trade=True
            if row['y_pred_bin']==0: close_trade=True
            if close_trade:
                final_gain= lat_gain - 2*commission
                pnl_list.append(final_gain)
                daily_ret.append(final_gain)
                in_position=False
                open_price=None
            else:
                pnl_list.append(0.0)
                daily_ret.append(0.0)
    pnl_list.append(0.0)
    daily_ret.append(0.0)
    df_test['PnL']= pnl_list
    df_test['CumPnL']= (1+df_test['PnL']).cumprod()-1
    return df_test, daily_ret

#########################
# 5) METRICAS
#########################
def calc_metrics(daily_ret):
    import pandas as pd
    rets= pd.Series(daily_ret)
    if len(rets)<1:
        return (0,0,0,0)
    cumret= (1+ rets).cumprod()-1
    peak= cumret.cummax()
    dd= (peak-cumret).max()
    std_= rets.std()
    sharpe= rets.mean()/std_*np.sqrt(365) if std_>1e-9 else 0.0
    neg= rets[rets<0]
    std_neg= neg.std()
    sortino= rets.mean()/ (std_neg+1e-9)*np.sqrt(365) if std_neg>1e-9 else 0.0
    gains= rets[rets>0].sum()
    losses= abs(rets[rets<0].sum())
    pf= (gains/losses) if losses>1e-9 else 999.0
    return (sharpe, sortino, dd, pf)

#########################
# 6) PROCESO PRINCIPAL
#########################
def main_4h_binance():
    print("=== INICIO: DESCARGA BINANCE 4H ===")
    df = download_klines_4h(SYMBOL, INTERVAL, LIMIT_KLINES)
    print(f"df shape: {df.shape}")
    print(df.tail(3))

    # Calcular Indicadores
    df['RSI14'] = ta.rsi(df['Close'], length=14)
    macd_ = ta.macd(df['Close'], fast=12, slow=26)
    df['MACD']  = macd_['MACD_12_26_9']
    df['MACDs'] = macd_['MACDs_12_26_9']
    df['EMA7']  = ta.ema(df['Close'], length=7)
    df['EMA21'] = ta.ema(df['Close'], length=21)
    bb = ta.bbands(df['Close'], length=20)
    df['BBU']   = bb['BBU_20_2.0']
    df['BBM']   = bb['BBM_20_2.0']
    df['BBL']   = bb['BBL_20_2.0']
    stochrsi = ta.stochrsi(df['Close'], length=14)
    df['STOCHRSIk']= stochrsi['STOCHRSIk_14_14_3_3']
    df['STOCHRSId']= stochrsi['STOCHRSId_14_14_3_3']
    df.dropna(inplace=True)

    # ReturnFut
    df['CloseShift'] = df['Close'].shift(-1)
    df['ReturnFut']  = (df['CloseShift'] - df['Close']) / df['Close']
    df.dropna(inplace=True)

    # y_bin => 1 si ReturnFut >= THRESHOLD_GAIN
    df['y_bin'] = (df['ReturnFut'] >= THRESHOLD_GAIN).astype(int)

    # Separar en train/test
    if len(df) < (TEST_DAYS + WINDOW + 5):
        print("No hay data suficiente para train/test.")
        return

    df_train = df.iloc[:-TEST_DAYS].copy()
    df_test  = df.iloc[-TEST_DAYS:].copy()

    feat_cols = [
        "Open","High","Low","Close","Volume","RSI14","MACD","MACDs","EMA7",
        "EMA21","BBU","BBM","BBL","STOCHRSIk","STOCHRSId"
    ]
    df_train.dropna(subset=feat_cols, inplace=True)
    df_test.dropna(subset=feat_cols, inplace=True)

    from sklearn.preprocessing import RobustScaler
    scaler= RobustScaler()
    X_train_2D= scaler.fit_transform(df_train[feat_cols].values)
    y_train_1D= df_train['y_bin'].values

    def create_seq(feat2D, targ1D, w):
        X,y=[],[]
        for i in range(len(feat2D)-w):
            X.append(feat2D[i:i+w])
            y.append(targ1D[i+w])
        return np.array(X), np.array(y)

    X_tr, y_tr = create_seq(X_train_2D, y_train_1D, WINDOW)
    if len(X_tr) < 10:
        print("No hay datos de train suficientes.")
        return

    # Creamos modelo
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import LSTM, Dropout, Dense
    from tensorflow.keras.optimizers import Adam

    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=(WINDOW, len(feat_cols))),
        Dropout(0.3),
        LSTM(32, return_sequences=False),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=LR), loss=weighted_bce, metrics=['accuracy'])

    val_size = int(len(X_tr)*0.2)
    X_trn = X_tr[:-val_size]
    y_trn = y_tr[:-val_size]
    X_val = X_tr[-val_size:]
    y_val = y_tr[-val_size:]

    steps_per_epoch = len(X_trn)//BATCH_SIZE
    if steps_per_epoch < 1:
        print("No hay batch para entrenar.")
        return

    from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
    es = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)
    rlrop = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=1, min_lr=1e-7)

    print(f"Entrenando con {len(X_trn)} train y {len(X_val)} val.")
    history = model.fit(
        X_trn, y_trn,
        validation_data=(X_val, y_val),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=[es, rlrop],
        shuffle=True,
        verbose=1
    )

    # Test => mini-backtest
    X_test_2D = scaler.transform(df_test[feat_cols].values)
    y_test_1D = df_test['y_bin'].values

    X_te, y_te = create_seq(X_test_2D, y_test_1D, WINDOW)
    if len(X_te)<1:
        print("Test no genera secuencias.")
        return
    df_test_seq = df_test.iloc[WINDOW:].copy()
    df_test_seq = df_test_seq.iloc[:len(X_te)]
    df_test_seq.reset_index(drop=False, inplace=True)

    y_proba_test= model.predict(X_te).flatten()
    y_bin_test= (y_proba_test>=0.5).astype(int)
    df_test_seq['y_pred_bin']= y_bin_test
    df_test_seq['OpenShift']= df_test_seq['Open'].shift(-1)
    df_test_seq.dropna(inplace=True)

    from statistics import mean, stdev
    df_test_seq, daily_ret= backtest_binario(df_test_seq, STOP_LOSS, TAKE_PROFIT, COMMISSION)
    final_pnl= df_test_seq['CumPnL'].iloc[-1] * 100

    sharpe_t, sortino_t, dd_t, pf_t= calc_metrics(daily_ret)
    print(f"\n=== MINI-HOLDOUT => {len(df_test_seq)} muestras ===")
    print(f"PNL= {final_pnl:.2f}%, Sharpe= {sharpe_t:.2f}, Sortino= {sortino_t:.2f}, DD= {dd_t:.2%}, PF= {pf_t:.2f}")

    # Señal "live"
    last_block= df.iloc[-WINDOW:].copy()
    if len(last_block)< WINDOW:
        print("No hay data live.")
        return
    feat_live= last_block[feat_cols].values
    feat_live_scaled= scaler.transform(feat_live)
    X_live= np.expand_dims(feat_live_scaled, axis=0)
    live_proba= model.predict(X_live).flatten()[0]
    live_bin= 1 if live_proba>=0.5 else 0

    last_dt= df.index[-1]
    signal_str= "BUY" if live_bin==1 else "NO_BUY"
    print(f"Señal del día => {signal_str} (prob={live_proba:.4f}), ultima vela: {last_dt}")

    # Guardar info de la ejecución
    import os
    import pandas as pd
    row_save= {
        "datetime": last_dt,
        "live_proba": live_proba,
        "signal": signal_str,
        "test_pnl": final_pnl,
        "test_pf": pf_t,
        "sharpe_test": sharpe_t,
        "sortino_test": sortino_t
    }

    df_save = pd.DataFrame([row_save])
    if os.path.isfile(OUTPUT_CSV):
        df_save.to_csv(OUTPUT_CSV, mode='a', header=False, index=False)
    else:
        df_save.to_csv(OUTPUT_CSV, mode='w', header=True, index=False)

    print(f"\nGuardado en {OUTPUT_CSV}")
    print("=== FIN SCRIPT 4H BINANCE (paper) ===")


if __name__=="__main__":
    main_4h_binance()