In [1]:
import pandas as pd
from tqdm import tqdm

import requests

from binance.client import Client
from datetime import datetime, timedelta

In [2]:
url = 'https://api.coingecko.com/api/v3/coins/markets'
params = {
    'vs_currency': 'usd',
    'order': 'market_cap_desc',
    'per_page': 100,
    'page': 1,
    'sparkline': False
}

response = requests.get(url, params=params)
data = response.json()

top_20_symbols = [coin['symbol'].upper() for coin in data]
top_20_symbols = [symbol + 'USDT' for symbol in top_20_symbols]

top_20_names = [coin['name'] for coin in data]
top_20_caps = [coin['market_cap'] for coin in data]

cap_df = pd.DataFrame({
    'symbol': top_20_symbols,
    'name': top_20_names,
    'cap': top_20_caps
})

In [3]:
cap_df

Unnamed: 0,symbol,name,cap
0,BTCUSDT,Bitcoin,2090306707478
1,ETHUSDT,Ethereum,308399324761
2,USDTUSDT,Tether,155496771026
3,XRPUSDT,XRP,129991150048
4,BNBUSDT,BNB,95150757377
...,...,...,...
95,STXUSDT,Stacks,962023730
96,METHUSDT,Mantle Staked Ether,952544753
97,XDCUSDT,XDC Network,945022541
98,PYUSDUSDT,PayPal USD,944519029


In [None]:
cap_df.to_csv('data/top_cap.csv', index=False)
print('Top market cap symbols saved to data/top_cap.csv')

In [5]:
api_key='your_api_key'
api_secret='your_api_secret'

client = Client(api_key, api_secret)

In [6]:
tickers = client.get_ticker()
tickers = pd.DataFrame(tickers)

In [7]:
tickers = tickers[tickers['symbol'].isin(top_20_symbols)]

In [8]:
tickers

Unnamed: 0,symbol,priceChange,priceChangePercent,weightedAvgPrice,prevClosePrice,lastPrice,lastQty,bidPrice,bidQty,askPrice,...,openPrice,highPrice,lowPrice,volume,quoteVolume,openTime,closeTime,firstId,lastId,count
11,BTCUSDT,-2606.97,-2.422,107036.09767199,107658.17,105051.21,0.0014,105051.21,7.20164,105051.22,...,107658.18,108952.38,104800.0,15937.58059,1705896432.686524,1750084980013,1750171380013,5015378933,5018744927,3365995
12,ETHUSDT,-95.04,-3.59,2590.56548503,2647.1,2552.07,0.0023,2552.06,79.3164,2552.07,...,2647.11,2680.34,2524.54,655767.2887,1698808104.315837,1750084980014,1750171380014,2536827717,2540413535,3585819
98,BNBUSDT,-6.44,-0.978,655.67797833,658.35,651.92,0.033,651.92,62.977,651.93,...,658.36,659.28,647.3,170801.236,111990609.11659,1750084980096,1750171380096,1078546646,1079156575,609930
190,LTCUSDT,-3.64,-4.103,87.00207223,88.73,85.08,4.492,85.07,66.286,85.08,...,88.72,88.92,84.8,301095.825,26195960.71451,1750084980186,1750171380186,467849467,468102098,252632
296,ADAUSDT,-0.0297,-4.546,0.63709663,0.6533,0.6236,1495.1,0.6236,10269.4,0.6237,...,0.6533,0.6575,0.6189,105228897.4,67040976.38761,1750084980290,1750171380290,670334693,670587530,252838
306,XRPUSDT,-0.0825,-3.611,2.26706902,2.2847,2.2022,236.8,2.2022,10523.0,2.2023,...,2.2847,2.337,2.1881,183239132.6,415415761.187,1750084980313,1750171380313,1185487005,1187141734,1654730
334,XLMUSDT,-0.0091,-3.416,0.26328985,0.2664,0.2573,1399.0,0.2572,36239.0,0.2573,...,0.2664,0.2704,0.2559,52477736.0,13816855.375,1750084980305,1750171380305,224754255,224848930,94676
350,TRXUSDT,-0.004,-1.428,0.27763717,0.2801,0.2762,39.1,0.2761,148827.6,0.2762,...,0.2802,0.2835,0.2719,737287338.1,204698369.89399,1750084980336,1750171380336,364319460,364524575,205116
351,ETCUSDT,-0.54,-3.129,17.08193837,17.26,16.72,0.93,16.72,380.94,16.73,...,17.26,17.53,16.63,337680.63,5768239.7105,1750084979929,1750171379929,236757409,236810987,53579
377,VETUSDT,-0.00113,-4.902,0.02243816,0.02304,0.02192,562.0,0.02191,43200.0,0.02192,...,0.02305,0.02329,0.02177,214451127.8,4811888.188633,1750084979148,1750171379148,260389071,260416434,27364


In [9]:
def get_historical_data(symbol, limit=365, interval=Client.KLINE_INTERVAL_1DAY, client=client):
    end_date = datetime.now()
    start_date = end_date - timedelta(days=limit)

    start_str = start_date.strftime("%d %b %Y")
    end_str = end_date.strftime("%d %b %Y")

    klines = client.get_historical_klines(symbol, interval, start_str=start_str, end_str=end_str)

    return klines

In [10]:
column_names = ['timestamp', 'open', 'high', 'low', 'close', 'volume',
                'close_time', 'quote_av', 'trades', 'tb_base_av',
                'tb_quote_av', 'ignore', 'symbol']

df = None

for currency in tqdm(tickers['symbol'], desc="Processing currencies"):
    try:
        klines = get_historical_data(currency, 365)
        currency_df = pd.DataFrame(klines, columns=column_names[:-1])
        currency_df['timestamp'] = pd.to_datetime(currency_df['timestamp'], unit='ms')
        currency_df['symbol'] = currency

        if df is None:
            df = currency_df
        else:
            df = pd.concat([df, currency_df], ignore_index=True)

    except Exception as e:
        print(f"Error processing {currency}: {e}")
        continue

Processing currencies: 100%|██████████| 56/56 [00:44<00:00,  1.26it/s]


In [11]:
df

Unnamed: 0,timestamp,open,high,low,close,volume,close_time,quote_av,trades,tb_base_av,tb_quote_av,ignore,symbol
0,2024-06-17,66676.86000000,67298.81000000,65130.00000000,66504.33000000,27386.16851000,1718668799999,1812061226.75656550,1542620,13716.89431000,907703353.53049530,0,BTCUSDT
1,2024-06-18,66504.33000000,66588.23000000,64060.00000000,65175.32000000,42350.10244000,1718755199999,2755141806.03109010,2151711,20076.41361000,1305946733.58928080,0,BTCUSDT
2,2024-06-19,65175.32000000,65727.54000000,64666.00000000,64974.37000000,20060.79576000,1718841599999,1307731953.95453160,1078458,10289.78180000,670875724.31372840,0,BTCUSDT
3,2024-06-20,64974.37000000,66482.94000000,64559.15000000,64869.99000000,24265.29031000,1718927999999,1584896152.16637010,1282687,11828.87934000,772646071.21699650,0,BTCUSDT
4,2024-06-21,64869.99000000,65066.66000000,63379.35000000,64143.56000000,25993.56442000,1719014399999,1665541539.70861590,1362617,12543.25960000,803462610.69190860,0,BTCUSDT
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17728,2025-06-13,1.00000000,1.00020000,0.99980000,1.00000000,3473672.00000000,1749859199999,3473510.08970000,2544,1154509.00000000,1154576.49300000,0,USD1USDT
17729,2025-06-14,1.00010000,1.00090000,1.00000000,1.00090000,18092846.00000000,1749945599999,18098382.54590000,6513,14847371.00000000,14852040.00300000,0,USD1USDT
17730,2025-06-15,1.00060000,1.01000000,0.99990000,1.00100000,56071230.00000000,1750031999999,56176792.70810000,24534,32113460.00000000,32174554.77450000,0,USD1USDT
17731,2025-06-16,1.00090000,1.00550000,1.00000000,1.00060000,8805829.00000000,1750118399999,8816188.66600000,4400,5563660.00000000,5571037.10440000,0,USD1USDT


In [None]:
df.to_csv('data/data.csv', index=False)
print('Historical data saved to data/data.csv')

In [None]:
df['timestamp'] = pd.to_datetime(df['timestamp'])
print(f"Data range: {df['timestamp'].min()} to {df['timestamp'].max()}")