# 📊 Scraper OHLC horario (Binance, CryptoCompare, CoinGecko)

Genera datos **hora a hora (últimos 105 días)** para todas las monedas listadas en `cryptos_master.csv` utilizando tres proveedores:

1. **Binance** – pares *XYZUSDT*.
2. **CryptoCompare** – endpoint `histohour`.
3. **CoinGecko** – endpoint `/coins/{id}/ohlc`.

Cada sección descarga, normaliza y guarda un CSV independiente y finalmente concatena todo en un único archivo `ohlc_hourly_<provider>.csv` dentro de `../../data/`.

> **💡 Tip:** Ejecuta cada sección por separado para depurar problemas de rate‑limit.


In [1]:
# ▸ Instala dependencias (si tu entorno ya las tiene, salta esta celda)
!python -m pip install --quiet requests pandas tqdm python-binance

In [2]:
import os, time, pathlib, json, math
import pandas as pd, numpy as np
import requests
from datetime import datetime, timedelta, timezone
from tqdm.auto import tqdm

# ── Claves API ───────────────────────────────────────────────
CRYPTOCOMPARE_KEY = '04b6720c1a883e4135d9c03af7775afb654338972208c9b378120cd8269ef2c3'
COINGECKO_BASE = 'https://api.coingecko.com/api/v3'

# Binance no requiere key para klines públicos

DATA_DIR = pathlib.Path('../../data')
DATA_DIR.mkdir(parents=True, exist_ok=True)

MASTER_PATH = pathlib.Path('../../data/cryptos_master.csv')
assert MASTER_PATH.exists(), f'No se encontró {MASTER_PATH}'
coins_df = pd.read_csv(MASTER_PATH)
print('Monedas:', len(coins_df))


Monedas: 2824


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def unix_ms(dt):
    return int(dt.timestamp())

def save_df(df, fname):
    path = DATA_DIR / fname
    df.to_csv(path, index=False)
    print('✔️  Guardado', path)


## 🔸 CryptoCompare

In [None]:
def fetch_cc_hourly(symbol, tsym='USDT', days=105):
    limit = days*24  # total hours
    records = []
    url = 'https://min-api.cryptocompare.com/data/v2/histohour'
    params = {
        'fsym': symbol.upper(),
        'tsym': tsym.upper(),
        'limit': 2000,  # max per call
        'api_key': CRYPTOCOMPARE_KEY
    }
    remaining = limit
    to_ts = int(time.time())
    pbar = tqdm(total=limit, desc=f'CC {symbol}')
    while remaining>0:
        take = min(2000, remaining)-1  # CC returns N+1 rows
        params['limit']=take
        params['toTs']=to_ts
        resp = requests.get(url, params=params, timeout=30)
        data = resp.json()['Data']['Data']
        if not data:
            break
        records.extend(data)
        remaining -= len(data)
        to_ts = data[0]['time']-1
        pbar.update(len(data))
        time.sleep(0.25)  # rate‑limit cushion
    pbar.close()
    df = pd.DataFrame(records)
    df.rename(columns={'time':'timestamp','volumefrom':'volume'}, inplace=True)
    df['symbol']=symbol.upper()
    df['source']='cryptocompare'
    return df[['timestamp','open','high','low','close','volume','symbol','source']]

dfs=[]
for sym in coins_df['symbol']:
    try:
        dfs.append(fetch_cc_hourly(sym))
    except Exception as e:
        print('❌', sym, e)

cc_df = pd.concat(dfs, ignore_index=True)
save_df(cc_df, 'ohlc_hourly_cryptocompare.csv')


## 🔸 Binance

In [None]:
def fetch_binance_hourly(symbol, quote='USDT', days=105):
    pair = f'{symbol.upper()}{quote.upper()}'
    interval='1h'
    limit=1000
    end = int(time.time()*1000)
    start = int((datetime.utcnow()-timedelta(days=days)).timestamp()*1000)
    records=[]
    pbar = tqdm(desc=f'BIN {pair}')
    while start<end:
        params={'symbol':pair,'interval':interval,'limit':limit,'startTime':start,'endTime':min(start+limit*3600*1000,end)}
        resp=requests.get('https://api.binance.com/api/v3/klines',params=params,timeout=30)
        if resp.status_code!=200:
            break
        klines=resp.json()
        if not klines:
            break
        for k in klines:
            records.append({'timestamp':k[0]//1000,'open':float(k[1]),'high':float(k[2]),'low':float(k[3]),'close':float(k[4]),'volume':float(k[5])})
        start = klines[-1][0]+3600*1000
        pbar.update(len(klines))
        time.sleep(0.2)
    pbar.close()
    df=pd.DataFrame(records)
    if df.empty:
        return df
    df['symbol']=symbol.upper()
    df['source']='binance'
    return df[['timestamp','open','high','low','close','volume','symbol','source']]

dfs=[]
for sym in coins_df['symbol']:
    try:
        dfs.append(fetch_binance_hourly(sym))
    except Exception as e:
        print('❌', sym,e)

bn_df=pd.concat(dfs, ignore_index=True)
save_df(bn_df,'ohlc_hourly_binance.csv')


## 🔸 CoinGecko

In [None]:
def fetch_cg_hourly(coin_id, vs='usd', days_total=105):
    records=[]
    endpoint=f'{COINGECKO_BASE}/coins/{coin_id}/ohlc'
    remaining=days_total
    pbar=tqdm(desc=f'CG {coin_id}')
    while remaining>0:
        span=min(90,remaining)
        params={'vs_currency':vs,'days':span}
        resp=requests.get(endpoint,params=params,timeout=30)
        if resp.status_code!=200:
            break
        data=resp.json()
        for ts,o,h,l,c in data:
            records.append({'timestamp':ts//1000,'open':o,'high':h,'low':l,'close':c})
        remaining-=span
        pbar.update(span)
        time.sleep(1)
    pbar.close()
    df=pd.DataFrame(records)
    if df.empty:
        return df
    df['symbol']=coin_id
    df['source']='coingecko'
    return df[['timestamp','open','high','low','close','symbol','source']]

dfs=[]
for cid in coins_df['coingecko_id'] if 'coingecko_id' in coins_df.columns else []:
    try:
        dfs.append(fetch_cg_hourly(cid))
    except Exception as e:
        print('❌', cid, e)

cg_df=pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
save_df(cg_df,'ohlc_hourly_coingecko.csv')


## 🔸 Unión de los tres proveedores

In [None]:
all_dfs=[df for df in [cc_df,bn_df,cg_df] if not df.empty]
master=pd.concat(all_dfs, ignore_index=True)
master.sort_values('timestamp', inplace=True)
save_df(master,'ohlc_hourly_all_sources.csv')
master.head()
