### LSTM

In [1]:
import pandas as pd
import numpy as np
data = pd.read_csv('BTC-I.csv', parse_dates=['Date'], index_col=['Date'])
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,ema20,ema50,rsi10,rsi30,rsi200,macd,signal,hist,Price-Up
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2015-04-05,253.761002,260.674988,251.942001,260.597992,260.597992,19649200,256.131664,258.804604,55.322540,50.121118,44.397749,-3.252124,-3.775657,0.523534,0
2015-04-06,260.721008,261.798004,254.574997,255.492004,255.492004,20034200,256.070744,258.674698,49.595801,48.750560,44.268371,-2.885118,-3.597550,0.712431,0
2015-04-07,255.274002,255.804993,252.205002,253.179993,253.179993,18467400,255.795434,258.459219,47.140702,48.134018,44.209743,-2.749134,-3.427866,0.678733,0
2015-04-08,253.063995,253.847000,244.214996,245.022003,245.022003,30086400,254.769393,257.932269,39.478769,46.010031,44.003080,-3.262043,-3.394702,0.132658,0
2015-04-09,244.751007,246.117996,239.399994,243.675995,243.675995,21643500,253.712879,257.373200,38.336478,45.666093,43.968998,-3.734095,-3.462580,-0.271515,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-23,95099.390625,96416.210938,92403.132812,94686.242188,94686.242188,65239002919,98310.512205,92489.999087,37.870913,54.141525,56.212704,974.011455,2352.815347,-1378.803892,1
2024-12-24,94684.343750,99404.062500,93448.015625,98676.093750,98676.093750,47114953674,98345.329495,92732.591034,50.713025,57.569224,56.908942,917.945789,2065.841435,-1147.895647,1
2024-12-25,98675.914062,99478.750000,97593.468750,99299.195312,99299.195312,33700394629,98436.173858,92990.104928,52.419615,58.075487,57.016216,913.264884,1835.326125,-922.061241,0
2024-12-26,99297.695312,99884.570312,95137.882812,95795.515625,95795.515625,47054980873,98184.682598,93100.121033,43.096484,54.306408,56.225209,619.694116,1592.199723,-972.505607,0


In [2]:
features = ['Close', 'ema20', 'ema50', 'rsi10','rsi30','rsi200', 'macd', 'signal', 'hist']
target = ['Price-Up']
# Separar X e y
X = data[features]
y = data[target].to_numpy()
y.shape, y

((3555, 1),
 array([[0],
        [0],
        [0],
        ...,
        [0],
        [0],
        [0]]))

In [3]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1))
X_scaled = scaler.fit_transform(X) # Ya lo convierte en un array de numpy 
X_scaled.shape, X_scaled

((3555, 9),
 array([[-0.99905404, -0.9994717 , -0.99945855, ..., -0.16545748,
         -0.18370997, -0.05235413],
        [-0.99915044, -0.99947293, -0.99946135, ..., -0.16539683,
         -0.18367719, -0.05224852],
        [-0.99919409, -0.99947847, -0.99946599, ..., -0.16537436,
         -0.18364596, -0.05226736],
        ...,
        [ 0.87083169,  0.97606199,  0.99684566, ..., -0.01399826,
          0.15477139, -0.56814299],
        [ 0.80468091,  0.9710016 ,  0.99921407, ..., -0.06251236,
          0.11002468, -0.59634489],
        [ 0.77136738,  0.96304186,  1.        , ..., -0.12494937,
          0.06031991, -0.65658811]]))

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, shuffle=False)

### Implementacion con Pytorch

In [5]:
# Convertimos los datos de Numpy a Tensores de Pytorch 
import torch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

X_train_tensor.shape,  y_train_tensor.shape, X_test_tensor.shape, y_test_tensor.shape 

(torch.Size([2844, 9]),
 torch.Size([2844, 1]),
 torch.Size([711, 9]),
 torch.Size([711, 1]))

In [6]:
# Creamos Datasets y Dataloaders de Pytorch
from torch.utils.data import TensorDataset, DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor) # Instanciamos la clase TendorDataSet 
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 32

train_loader = DataLoader(train_dataset, batch_size, shuffle=False) # Instanciamos la clase DataLoader
test_loader = DataLoader(test_dataset, batch_size, shuffle=False)

print(f'Cantidad: {len(train_loader)} ')
for batch_X, batch_y in train_loader:
    print("Batch X shape:", batch_X.shape)
    print("Batch y shape:", batch_y.shape)
      # Solo el primer batch
    break


Cantidad: 89 
Batch X shape: torch.Size([32, 9])
Batch y shape: torch.Size([32, 1])


In [7]:
import torch
import torch.nn as nn 

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print (device)

class MiLSTM (nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size ):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size
        # Definimos 1 capa LSTM y un Full Connected 
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) #Ej [8, 64, 2, (batch_size, seq_length, hidden_size)]
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        # Estados ocultos y celdas en 0 (num_layers, batch_size, hidden_size)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) # Ej [2, 32, 64]
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) # Ej [2, 32, 64]
        x = x.unsqueeze(1)
        # Pasa la entrada por la capa LSTM 
        out, _ = self.lstm(x, (h0,c0)) #LLamo a la instancia de lstm (llamo a su metodo __call__ (no necesita nombre)) (batch_size, seq_length, hidden_size) [32, 30, 64]

        #Seleccionamos la ultima salida de la secuencia
        out = out[:,-1,:] # (batch_size , hidden_size) [32,64]

        #Pasa por la fc
        out = self.fc(out) # (batch_size, output_size) Ej [32,1]

        return out

# Parametros del modelo 
input_size = len(features) 
hidden_size = 128
num_layers  = 2
output_size = 1 # Clasificacion 

model = MiLSTM(input_size, hidden_size, num_layers, output_size).to(device)
print(model) 

cuda
MiLSTM(
  (lstm): LSTM(9, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=1, bias=True)
)


In [8]:
# Definir la función de pérdida y el optimizador
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Entrenamiento del modelo
num_epochs = 100 
model.train()
print(f'Entrenado en {device}')

for epoch in range(num_epochs):
    for batch_X, batch_y in train_loader:
        batch_X = batch_X.to(device) #[32, 30, 8]
        batch_y = batch_y.to(device) #[32, 1]

        outputs = model(batch_X).squeeze() # [batch_size, 1] => [batch_size] 

        loss = criterion(outputs, batch_y.squeeze())

        loss.backward()

        optimizer.step()

        optimizer.zero_grad()
    if (epoch + 1 ) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')





Entrenado en cuda
Epoch [10/100], Loss: 0.6861
Epoch [20/100], Loss: 0.6891
Epoch [30/100], Loss: 0.6925
Epoch [40/100], Loss: 0.6943
Epoch [50/100], Loss: 0.6935
Epoch [60/100], Loss: 0.6910
Epoch [70/100], Loss: 0.6881
Epoch [80/100], Loss: 0.6858
Epoch [90/100], Loss: 0.6844
Epoch [100/100], Loss: 0.6838


In [9]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

model.eval()  # Modo de evaluación
y_true = []  # Etiquetas reales
y_pred = []  # Predicciones binarias
y_probs = []  # Probabilidades predichas

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)

        outputs = model(batch_X).squeeze()   # Logits
        probs = torch.sigmoid(outputs)       # Probabilidades
        predicted = torch.round(probs)       # Predicciones binarias (0 o 1)

        # Guardar los valores reales y predichos
        y_true.extend(batch_y.squeeze().cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())
        y_probs.extend(probs.cpu().numpy())

# Convertir listas a numpy arrays
y_true = np.array(y_true)
y_pred = np.array(y_pred)
y_probs = np.array(y_probs)

# Calcular métricas
accuracy = np.mean(y_pred == y_true) * 100
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
roc_auc = roc_auc_score(y_true, y_probs)

# Mostrar resultados
print(f'Accuracy: {accuracy:.2f}%')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1-Score: {f1:.2f}')
print(f'ROC-AUC: {roc_auc:.2f}')


Accuracy: 51.34%
Precision: 0.52
Recall: 0.49
F1-Score: 0.50
ROC-AUC: 0.51


In [10]:
def predict(input_data):
    import pandas as pd

    # Lista de nombres de las características usadas para entrenar el scaler
    feature_names = ["Close", "ema20", "ema50", "rsi10", "rsi30", "rsi200", "macd", "signal", "hist"]

    # Convertir input_data a un DataFrame con los nombres correctos
    input_df = pd.DataFrame([input_data], columns=feature_names)

    # Normalizar los datos de entrada
    input_array = scaler.transform(input_df)  # Transformar con el scaler
    input_data = torch.tensor(input_array, dtype=torch.float32).to(device)

    # Realizar la predicción
    model.eval()
    with torch.no_grad():
        output = model(input_data)
        probability = torch.sigmoid(output).item()  # Convertir logits a probabilidad
        prediction = 1 if probability >= 0.5 else 0  # Umbral de 0.5 para clasificación

    return prediction


In [13]:
# Ejemplo de datos
close = 15000
ema20 = 34800
ema50 = 34500
rsi10 = 70
rsi30 = 60
rsi200 = 50
macd = 10
signal = 11
hist = 2

# Predicción
result = predict([close, ema20, ema50, rsi10, rsi30, rsi200, macd, signal, hist])
print(f"Predicción: {result}")  # Salida: 1 o 0


Predicción: 1


In [None]:
from backtesting import Backtest, Strategy
import talib
data = data.loc['2020-01-01':]
class PredictStrategy(Strategy):
    
    def init(self):
        # Calcular indicadores técnicos necesarios
        close = pd.Series(self.data.Close)
        
        # Cálculo de EMAs
        self.ema20 = self.I(talib.EMA, close, timeperiod=20)
        self.ema50 = self.I(talib.EMA, close, timeperiod=50)
        
        # RSI con diferentes ventanas
        self.rsi10 = self.I(talib.RSI, close, timeperiod=10)
        self.rsi30 = self.I(talib.RSI, close, timeperiod=30)
        self.rsi200 = self.I(talib.RSI, close,timeperiod =200)
        
        # Calcular el MACD
        self.macd, self.signal, self.hist = self.I(
            lambda x: talib.MACD(x, fastperiod=12, slowperiod=26, signalperiod=9),
            close
        )
        
        
    def next(self):
        # Obtener los valores más recientes de los indicadores
        close = self.data.Close[-1]
        ema20 = self.ema20[-1]
        ema50 = self.ema50[-1]
        rsi10 = self.rsi10[-1]
        rsi30 = self.rsi30[-1]
        rsi200 = self.rsi200[-1]
        macd = self.macd[-1]
        signal = self.signal[-1]
        hist = self.hist[-1]

        # Realizar la predicción
        result = predict([close, ema20, ema50, rsi10, rsi30, rsi200, macd, signal, hist])

        # Ejecutar órdenes de compra o venta basadas en la predicción
        if result == 1 and not self.position:
            self.buy()
        elif result == 0 and self.position:
            self.sell()

# Configurar y ejecutar el backtest
bt = Backtest(data, PredictStrategy, cash=200000)
stats = bt.run()
stats

Start                     2020-01-01 00:00:00
End                       2024-12-27 00:00:00
Duration                   1822 days 00:00:00
Exposure Time [%]                   88.919364
Equity Final [$]               2050338.239258
Equity Peak [$]                2236539.825195
Return [%]                          925.16912
Buy & Hold Return [%]             1205.955361
Return (Ann.) [%]                   59.360576
Volatility (Ann.) [%]               99.241312
Sharpe Ratio                         0.598144
Sortino Ratio                        1.559191
Calmar Ratio                         0.778735
Max. Drawdown [%]                  -76.226961
Avg. Drawdown [%]                   -8.732851
Max. Drawdown Duration      847 days 00:00:00
Avg. Drawdown Duration       41 days 00:00:00
# Trades                                    1
Win Rate [%]                            100.0
Best Trade [%]                     943.954248
Worst Trade [%]                    943.954248
Avg. Trade [%]                    

In [15]:
bt.plot()

  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  df2 = (df.assign(_width=1).set_index('datetime')
  fig = gridplot(
  fig = gridplot(
