In [None]:
import ccxt
import pandas as pd
import time
from datetime import datetime, timedelta
from dotenv import load_dotenv
import os
from binance.client import Client
import requests
import ta
import talib
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import GRU, LSTM, Dense, Dropout, Input
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasRegressor
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tqdm import tqdm

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
# Definir la ruta de tu proyecto en Google Drive
project_path = '/content/drive/My Drive/tradingcripto/src'

# Definir la ruta del archivo de datos
data_file_path = os.path.join(project_path, '/content/drive/My Drive/tradingcripto/data/processed/BTCUSDT_prepared_data.pkl')

# Verificar si el archivo existe
if os.path.exists(data_file_path):
    print(f"El archivo {data_file_path} existe.")
else:
    print(f"El archivo {data_file_path} no se encuentra.")


In [None]:
load_dotenv()  # Esto carga las variables de entorno desde `.env`

api_key = os.getenv('BINANCE_API_KEY')
api_secret = os.getenv('BINANCE_API_SECRET')

In [None]:
client = Client(api_key, api_secret)

def fetch_binance_data(symbol, interval, limit=1000):
    bars = client.get_historical_klines(symbol, interval, "1 Jan, 2017", limit=limit)
    df = pd.DataFrame(bars, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore'])
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    df.set_index('timestamp', inplace=True)
    df = df[['open', 'high', 'low', 'close', 'volume']]
    return df

symbols = ['BTCUSDT', 'ETHUSDT', 'XRPUSDT', 'LTCUSDT']
intervals = ['1h']  # Solo intervalos horarios

data = {}  # Diccionario para almacenar DataFrames
raw_data_dir = "/content/drive/My Drive/tradingcripto/data/raw"
os.makedirs(raw_data_dir, exist_ok=True)

for symbol in symbols:
    data[symbol] = {}
    for interval in intervals:
        try:
            print(f"Fetching data for {symbol} at interval {interval}")
            df = fetch_binance_data(symbol, interval)
            data[symbol][interval] = df
            # Guardar los datos descargados
            df.to_csv(f"/content/drive/My Drive/tradingcripto/data/raw/{symbol}_{interval}.csv")
            print(f"Data fetched and saved for {symbol} at interval {interval}. Data shape: {df.shape}")
            time.sleep(60)  # Delay para no violar los límites de la API
        except Exception as e:
            print(f"Error fetching data for {symbol} at interval {interval}: {str(e)}")

In [None]:
output_dir = "/content/drive/My Drive/tradingcripto/data/interim"
raw_data_dir = "/content/drive/My Drive/tradingcripto/data/raw"

In [None]:
class CryptoDataProcessor:
    def __init__(self, df, symbol):
        self.df = df
        self.symbol = symbol
        # Inicialización basada en análisis de puntos extremos iniciales
        self.initialize_support_resistance()
        self.breakout_threshold = 0.01  # 1% por encima o por debajo


    def initialize_support_resistance(self):
        # Establecer soporte inicial como el mínimo de los primeros días
        self.support = min(self.df[f'{self.symbol}_low'].iloc[:30])
        # Establecer resistencia inicial como el máximo de los primeros días
        self.resistance = max(self.df[f'{self.symbol}_high'].iloc[:30])
        self.df[f'{self.symbol}_support'] = self.support
        self.df[f'{self.symbol}_resistance'] = self.resistance

    def update_support_resistance(self):
        # Crear columnas para soporte y resistencia dinámicos
        self.df[f'{self.symbol}_dynamic_support'] = self.support
        self.df[f'{self.symbol}_dynamic_resistance'] = self.resistance

        for index, row in self.df.iterrows():
            current_low = row[f'{self.symbol}_low']
            current_high = row[f'{self.symbol}_high']
            current_close = row[f'{self.symbol}_close']

            # Revisar y ajustar soporte y resistencia
            if current_close > self.resistance:
                self.support = self.resistance  # La resistencia rota se convierte en soporte
                self.resistance = current_high
            elif current_close < self.support:
                self.resistance = self.support  # El soporte roto se convierte en resistencia
                self.support = current_low

            # Actualizar el DataFrame con los valores dinámicos
            self.df.at[index, f'{self.symbol}_dynamic_support'] = self.support
            self.df.at[index, f'{self.symbol}_dynamic_resistance'] = self.resistance

    def add_technical_indicators(self):
        # Calcular SMAs y EMAs con manejo inicial dinámico para los datos iniciales limitados
        windows = [7, 14, 21, 28, 50, 100, 200]
        for i in windows:
            self.df[f'{self.symbol}_sma_{i}'] = self.df[f'{self.symbol}_close'].rolling(window=i, min_periods=1).mean()
            self.df[f'{self.symbol}_ema_{i}'] = self.df[f'{self.symbol}_close'].ewm(span=i, adjust=False, min_periods=1).mean()

        # RSI personalizado
        delta = self.df[f'{self.symbol}_close'].diff()
        gain = delta.where(delta > 0, 0.0)
        loss = -delta.where(delta < 0, 0.0)

        # Usar mean() con min_periods=1 asegura que calculamos el valor incluso si hay menos datos que la ventana
        avg_gain = gain.rolling(window=14, min_periods=1).mean()
        avg_loss = loss.rolling(window=14, min_periods=1).mean()

        rs = avg_gain / avg_loss
        self.df[f'{self.symbol}_rsi_14'] = 100.0 - (100.0 / (1.0 + rs))

        # MACD
        macd = ta.trend.MACD(self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_macd'] = macd.macd()
        self.df[f'{self.symbol}_macd_signal'] = macd.macd_signal()
        self.df[f'{self.symbol}_macd_diff'] = macd.macd_diff()

        # Williams %R
        self.df[f'{self.symbol}_willr'] = ta.momentum.williams_r(
            self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'], lbp=14)

        # ATR
        self.df[f'{self.symbol}_atr_14'] = ta.volatility.average_true_range(
            self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'], window=14)

        # Stochastic Oscillator
        # stoch_k, stoch_d = ta.momentum.stoch(
        #     self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'],
        #     window=14, smooth_window=3)
        # self.df[f'{self.symbol}_stoch'] = stoch_k
        # self.df[f'{self.symbol}_stoch_signal'] = stoch_d

        # ADX
        self.df[f'{self.symbol}_adx'] = ta.trend.adx(
            self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'], window=14)

        # Commodity Channel Index
        self.df[f'{self.symbol}_cci'] = ta.trend.cci(
            self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'], window=20)

        # Ichimoku
        ichimoku = ta.trend.IchimokuIndicator(
            high=self.df[f'{self.symbol}_high'], low=self.df[f'{self.symbol}_low'], window1=9, window2=26, window3=52)
        self.df[f'{self.symbol}_ichimoku_a'] = ichimoku.ichimoku_a()
        self.df[f'{self.symbol}_ichimoku_b'] = ichimoku.ichimoku_b()

        # Momentum
        for i in [10, 14, 20, 30]:
            self.df[f'{self.symbol}_momentum_{i}'] = ta.momentum.roc(self.df[f'{self.symbol}_close'], window=i)

        # Keltner Channel
        keltner = ta.volatility.KeltnerChannel(
            high=self.df[f'{self.symbol}_high'], low=self.df[f'{self.symbol}_low'], close=self.df[f'{self.symbol}_close'], window=20, window_atr=10)
        self.df[f'{self.symbol}_keltner_hband'] = keltner.keltner_channel_hband()
        self.df[f'{self.symbol}_keltner_lband'] = keltner.keltner_channel_lband()


    def add_candle_patterns(self):
    # Patrones básicos y comunes
        self.df[f'{self.symbol}_doji'] = talib.CDLDOJI(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_engulfing'] = talib.CDLENGULFING(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_hammer'] = talib.CDLHAMMER(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_inverted_hammer'] = talib.CDLINVERTEDHAMMER(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_hanging_man'] = talib.CDLHANGINGMAN(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_shooting_star'] = talib.CDLSHOOTINGSTAR(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_morning_star'] = talib.CDLMORNINGSTAR(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_evening_star'] = talib.CDLEVENINGSTAR(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_morning_doji_star'] = talib.CDLMORNINGDOJISTAR(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_evening_doji_star'] = talib.CDLEVENINGDOJISTAR(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])

    # Patrones complejos
        self.df[f'{self.symbol}_piercing_line'] = talib.CDLPIERCING(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_dark_cloud_cover'] = talib.CDLDARKCLOUDCOVER(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_three_white_soldiers'] = talib.CDL3WHITESOLDIERS(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_three_black_crows'] = talib.CDL3BLACKCROWS(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_three_inside_up_down'] = talib.CDL3INSIDE(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_three_outside_up_down'] = talib.CDL3OUTSIDE(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_three_stars_in_the_south'] = talib.CDL3STARSINSOUTH(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])
        self.df[f'{self.symbol}_three_advancing_white_soldiers'] = talib.CDL3WHITESOLDIERS(
        self.df[f'{self.symbol}_open'], self.df[f'{self.symbol}_high'], self.df[f'{self.symbol}_low'], self.df[f'{self.symbol}_close'])

    def process(self):
        self.add_technical_indicators()
        self.add_candle_patterns()
        self.update_support_resistance()
        # Añadir el precio de cierre de la siguiente hora como variable a predecir
        self.df[f'{self.symbol}_next_close'] = self.df[f'{self.symbol}_close'].shift(-1)
        return self.df

# Código para cargar y renombrar los DataFrames correctamente
data = {}
symbols = ['BTCUSDT', 'ETHUSDT', 'XRPUSDT', 'LTCUSDT']
for symbol in symbols:
    file_path = f"/content/drive/My Drive/tradingcripto/data/raw/{symbol}_1h.csv"
    if os.path.exists(file_path):
        df = pd.read_csv(file_path, index_col='timestamp', parse_dates=True)
        # Añadir el prefijo del símbolo a las columnas
        df.columns = [f"{symbol}_{col}" for col in df.columns]
        data[symbol] = df
    else:
        print(f"No data file found for {symbol}")

processed_data = {symbol: CryptoDataProcessor(data[symbol], symbol).process()
                  for symbol in data}

# Opcional: Guardar cada DataFrame procesado en un archivo CSV separado
for symbol, df in processed_data.items():
    df.to_csv(f"/content/drive/My Drive/tradingcripto/data/processed/{symbol}_processed.csv")