In [66]:
import os
from dotenv import load_dotenv

BINANCE_API_KEY = os.getenv("BINANCE_API_KEY")
BINANCE_API_SECRET = os.getenv("BINANCE_API_SECRET")

In [67]:
import requests
import pandas as pd
from binance.client import Client
import time

# --- Step 1: Fetch Top 50 Cryptocurrencies by Market Cap from CoinMarketCap ---
with open("data/top_50_cryptos.txt", "r") as f:
    top_50 = [line.strip() for line in f.readlines()]

print(f"Top 50 Cryptos Loaded: {top_50}")

# --- Step 2: Check Which Coins Are Tradable on Binance ---
API_KEY = BINANCE_API_KEY
API_SECRET = BINANCE_API_SECRET

client = Client(API_KEY, API_SECRET)

# Get all Binance trading pairs
exchange_info = client.get_exchange_info()
binance_symbols = {s["symbol"]: s for s in exchange_info["symbols"] if s["status"] == "TRADING"}

# Filter valid USDT trading pairs
valid_pairs = [f"{symbol}USDT" for symbol in top_50 if f"{symbol}USDT" in binance_symbols]
print(f"Tradable Cryptos on Binance: {valid_pairs}")

# --- Step 3: Fetch Historical Data for Valid Pairs ---
def get_historical_data(symbol, interval=Client.KLINE_INTERVAL_1DAY, start_date="1 Jul, 2023", end_date="10 Feb, 2025"):
    all_klines = []
    last_timestamp = client._get_earliest_valid_timestamp(symbol, interval)
    
    while True:
        klines = client.get_historical_klines(symbol, interval, start_date, end_date)

        if not klines:
            break  # Stop if no data is returned

        all_klines.extend(klines)
        last_timestamp = klines[-1][0]
        start_date = pd.to_datetime(last_timestamp, unit="ms").strftime("%d %b, %Y")
        time.sleep(1)  # Avoid hitting rate limits

    # Convert to DataFrame
    df = pd.DataFrame(all_klines, columns=["Open Time", "Open", "High", "Low", "Close", "Volume",
                                           "Close Time", "Quote Asset Volume", "Number of Trades",
                                           "Taker Buy Base", "Taker Buy Quote", "Ignore"])

    df["Open Time"] = pd.to_datetime(df["Open Time"], unit="ms")
    df["Close Time"] = pd.to_datetime(df["Close Time"], unit="ms")
    df[["Open", "High", "Low", "Close", "Volume"]] = df[["Open", "High", "Low", "Close", "Volume"]].astype(float)
    
    return df

# Dictionary to store historical data for valid pairs
crypto_data = {}

for pair in valid_pairs:
    print(f"Fetching historical data for {pair}...")
    crypto_data[pair] = get_historical_data(pair)

# --- Step 4: Merge Data into a Single DataFrame for Model Training ---
combined_df = pd.DataFrame()

for symbol, df in crypto_data.items():
    df = df[["Open Time", "Close", "Volume"]].copy()
    df.rename(columns={"Close": f"{symbol}_Close", "Volume": f"{symbol}_Volume"}, inplace=True)
    
    if combined_df.empty:
        combined_df = df
    else:
        combined_df = combined_df.merge(df, on="Open Time", how="outer")

combined_df.fillna(method="ffill", inplace=True)

# Save to CSV (optional)
combined_df.to_csv("data/crypto_historical_data.csv", index=False)
print("Data successfully saved!")

Top 50 Cryptos Loaded: ['BTC', 'ETH', 'USDT', 'XRP', 'SOL', 'BNB', 'USDC', 'DOGE', 'ADA', 'TRX', 'LINK', 'AVAX', 'SUI', 'XLM', 'TON', 'SHIB', 'LEO', 'LTC', 'HBAR', 'HYPE', 'DOT', 'BGB', 'BCH', 'USDe', 'OM', 'UNI', 'DAI', 'ONDO', 'XMR', 'PEPE', 'NEAR', 'AAVE', 'MNT', 'ICP', 'APT', 'TAO', 'TRUMP', 'ETC', 'OKB', 'VET', 'POL', 'KAS', 'CRO', 'ALGO', 'RENDER', 'JUP', 'FIL', 'ARB', 'GT', 'FDUSD']
Tradable Cryptos on Binance: ['BTCUSDT', 'ETHUSDT', 'XRPUSDT', 'SOLUSDT', 'BNBUSDT', 'USDCUSDT', 'DOGEUSDT', 'ADAUSDT', 'TRXUSDT', 'LINKUSDT', 'AVAXUSDT', 'SUIUSDT', 'XLMUSDT', 'TONUSDT', 'SHIBUSDT', 'LTCUSDT', 'HBARUSDT', 'DOTUSDT', 'BCHUSDT', 'OMUSDT', 'UNIUSDT', 'PEPEUSDT', 'NEARUSDT', 'AAVEUSDT', 'ICPUSDT', 'APTUSDT', 'TAOUSDT', 'TRUMPUSDT', 'ETCUSDT', 'VETUSDT', 'POLUSDT', 'ALGOUSDT', 'RENDERUSDT', 'JUPUSDT', 'FILUSDT', 'ARBUSDT', 'FDUSDUSDT']
Fetching historical data for BTCUSDT...
Fetching historical data for ETHUSDT...
Fetching historical data for XRPUSDT...
Fetching historical data for SOLUS

  combined_df.fillna(method="ffill", inplace=True)


Data successfully saved!


In [68]:
combined_df

Unnamed: 0,Open Time,BTCUSDT_Close,BTCUSDT_Volume,ETHUSDT_Close,ETHUSDT_Volume,XRPUSDT_Close,XRPUSDT_Volume,SOLUSDT_Close,SOLUSDT_Volume,BNBUSDT_Close,...,RENDERUSDT_Close,RENDERUSDT_Volume,JUPUSDT_Close,JUPUSDT_Volume,FILUSDT_Close,FILUSDT_Volume,ARBUSDT_Close,ARBUSDT_Volume,FDUSDUSDT_Close,FDUSDUSDT_Volume
0,2023-07-01,30585.90,17501.75075,1924.50,178373.3688,0.4732,232549409.0,18.70,3515091.930,247.90,...,,,,,4.107,6623940.12,1.1514,40352979.6,,
1,2023-07-02,30617.03,23286.41019,1937.48,255852.7832,0.4845,337155089.0,19.45,4872775.080,246.50,...,,,,,4.109,4937993.31,1.1511,36723276.4,,
2,2023-07-03,31156.20,43761.64311,1955.54,322102.4295,0.4890,357020349.0,19.26,3065186.840,246.50,...,,,,,4.734,28961489.98,1.1694,50849349.9,,
3,2023-07-04,30766.51,33206.11943,1936.20,205525.0984,0.4872,299439491.0,19.13,3096292.180,242.50,...,,,,,4.546,7988349.41,1.1488,46515719.2,,
4,2023-07-05,30504.81,33215.67122,1910.36,267633.4783,0.4777,292620010.0,19.00,2415908.510,238.90,...,,,,,4.600,14798522.19,1.1117,43694827.3,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
586,2025-02-06,96554.35,23515.20405,2686.64,719459.9571,2.3250,322896937.0,189.54,3444767.147,571.72,...,4.189,5844305.33,0.8274,35259694.7,3.121,9132598.18,0.4455,87332007.5,0.9992,401886647.0
587,2025-02-07,96506.80,31794.22065,2622.10,695467.3612,2.3968,447487945.0,191.99,3925596.773,577.71,...,4.134,6064457.98,0.8007,41987209.7,3.188,8734086.21,0.4326,102282933.0,0.9989,515209741.0
588,2025-02-08,96444.74,10147.24294,2632.46,379685.1509,2.4190,154717204.0,199.32,1967596.752,612.39,...,4.257,3004873.80,0.8403,25710852.7,3.306,4382412.59,0.4500,47660997.6,0.9991,282426481.0
589,2025-02-09,96462.75,14120.91613,2627.18,387166.7911,2.3928,204205603.0,200.47,2879138.460,615.81,...,4.243,3872535.77,0.8327,31213202.3,3.311,4864828.39,0.4490,48100116.0,0.9989,323574754.0


In [69]:
# Apply Indicators to Each Cryptocurrency
from numpy import nan as npNaN
import pandas_ta as ta

ta_df = combined_df.copy()

for col in combined_df.columns:
    if col.endswith("_Close"):  
        symbol = col.replace("_Close", "")

        # Ensure numeric inputs are float
        close_price = ta_df[col].astype(float)
        volume = ta_df[f"{symbol}_Volume"].astype(float)

        # Trend Indicators
        ta_df[f"{symbol}_EMA_10"] = ta.ema(close_price, length=10).astype(float)
        ta_df[f"{symbol}_EMA_50"] = ta.ema(close_price, length=50).astype(float)

        # Momentum Indicators
        ta_df[f"{symbol}_RSI_14"] = ta.rsi(close_price, length=14).astype(float)
        macd = ta.macd(close_price)
        ta_df[f"{symbol}_MACD"] = macd["MACD_12_26_9"].astype(float)
        ta_df[f"{symbol}_MACD_Signal"] = macd["MACDs_12_26_9"].astype(float)

        # Volatility Indicators
        bb = ta.bbands(close_price, length=20)
        ta_df[f"{symbol}_BB_Upper"] = bb["BBU_20_2.0"].astype(float)
        ta_df[f"{symbol}_BB_Middle"] = bb["BBM_20_2.0"].astype(float)

        # Volume Indicators
        ta_df[f"{symbol}_OBV"] = ta.obv(close_price, volume).astype(float)
        ta_df[f"{symbol}_MFI_14"] = ta.mfi(close_price, close_price, close_price, volume, length=14).astype(float)

# Save to CSV
combined_df.to_csv("data/crypto_with_indicators.csv", index=False)
print("Technical indicators added successfully!")



 8.72020431e+08 2.22609507e+09 7.67391409e+08 7.14095195e+08
 5.50315612e+08 6.30343302e+08 9.95959954e+08 7.03352226e+08
 3.18519133e+08 1.32844152e+09 7.72957669e+08 3.83332935e+08
 9.04575315e+08 1.36052409e+09 2.64032202e+08 9.25400126e+08
 6.82795926e+08 4.99100337e+08 1.16366613e+09 3.15822077e+08
 5.92731405e+08 2.05799354e+09 4.20399638e+08 4.53833303e+08
 7.26935199e+08 1.45827016e+09 8.28899119e+08 1.01670038e+09
 6.97646012e+08 1.15079951e+09 9.84753510e+08 4.83711480e+08
 6.90927503e+08 9.17030526e+08 1.20293378e+09 3.45245418e+08
 6.88696012e+08 8.28249788e+08 1.06091337e+09 3.73189066e+08
 6.47797679e+08 4.14771270e+08 2.23443905e+09 1.03069303e+09
 1.76299664e+09 8.23043489e+08 6.85404061e+08 3.09249522e+09
 3.91010429e+09 1.89338557e+09 5.75291760e+08 7.14186049e+08
 1.13403326e+09 1.89410010e+09 6.44355136e+08 7.83156429e+08
 1.36954724e+09 1.18991343e+09 3.02923049e+09 1.61941479e+09
 2.02803098e+09 1.40171307e+09 7.93760530e+08 1.34900628e+09
 1.68529461e+09 1.685072

Technical indicators added successfully!


In [70]:
ta_df

Unnamed: 0,Open Time,BTCUSDT_Close,BTCUSDT_Volume,ETHUSDT_Close,ETHUSDT_Volume,XRPUSDT_Close,XRPUSDT_Volume,SOLUSDT_Close,SOLUSDT_Volume,BNBUSDT_Close,...,ARBUSDT_MFI_14,FDUSDUSDT_EMA_10,FDUSDUSDT_EMA_50,FDUSDUSDT_RSI_14,FDUSDUSDT_MACD,FDUSDUSDT_MACD_Signal,FDUSDUSDT_BB_Upper,FDUSDUSDT_BB_Middle,FDUSDUSDT_OBV,FDUSDUSDT_MFI_14
0,2023-07-01,30585.90,17501.75075,1924.50,178373.3688,0.4732,232549409.0,18.70,3515091.930,247.90,...,,,,,,,,,,
1,2023-07-02,30617.03,23286.41019,1937.48,255852.7832,0.4845,337155089.0,19.45,4872775.080,246.50,...,,,,,,,,,,
2,2023-07-03,31156.20,43761.64311,1955.54,322102.4295,0.4890,357020349.0,19.26,3065186.840,246.50,...,,,,,,,,,,
3,2023-07-04,30766.51,33206.11943,1936.20,205525.0984,0.4872,299439491.0,19.13,3096292.180,242.50,...,,,,,,,,,,
4,2023-07-05,30504.81,33215.67122,1910.36,267633.4783,0.4777,292620010.0,19.00,2415908.510,238.90,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
586,2025-02-06,96554.35,23515.20405,2686.64,719459.9571,2.3250,322896937.0,189.54,3444767.147,571.72,...,45.516931,0.999258,0.999388,47.779981,-0.000096,-0.000043,1.000237,0.999410,-2.045380e+09,65.884513
587,2025-02-07,96506.80,31794.22065,2622.10,695467.3612,2.3968,447487945.0,191.99,3925596.773,577.71,...,45.352470,0.999193,0.999368,44.581162,-0.000120,-0.000058,1.000235,0.999380,-2.560590e+09,61.913757
588,2025-02-08,96444.74,10147.24294,2632.46,379685.1509,2.4190,154717204.0,199.32,1967596.752,612.39,...,45.319813,0.999176,0.999358,47.122754,-0.000121,-0.000071,1.000177,0.999410,-2.278163e+09,61.611821
589,2025-02-09,96462.75,14120.91613,2627.18,387166.7911,2.3928,204205603.0,200.47,2879138.460,615.81,...,45.350601,0.999126,0.999340,44.904930,-0.000137,-0.000084,1.000183,0.999405,-2.601738e+09,57.426401


In [71]:
ta_df['Open Time']

0     2023-07-01
1     2023-07-02
2     2023-07-03
3     2023-07-04
4     2023-07-05
         ...    
586   2025-02-06
587   2025-02-07
588   2025-02-08
589   2025-02-09
590   2025-02-10
Name: Open Time, Length: 591, dtype: datetime64[ns]