# Descargar datos desde un sitio web

## Opción A — Yahoo Finance (yfinance)

Pros: muy simple, sin límites estrictos; 
Contras: fuente agregada (no de un exchange específico).

In [2]:
# pip install yfinance pandas
import yfinance as yf
import pandas as pd

# Rango 2025 completo hasta hoy
start = "2025-01-01"
end   = None  # None = hasta la fecha actual

# BTC frente al USD en Yahoo Finance
ticker = "BTC-USD"

# Descarga de velas diarias
btc_yahoo = yf.download(ticker, start=start, end=end, interval="1d", auto_adjust=False)

# Limpieza/renombrado opcional
btc_yahoo = btc_yahoo.rename(columns={
    "Open":"open","High":"high","Low":"low","Close":"close","Adj Close":"adj_close","Volume":"volume"
})
btc_yahoo.index.name = "date"

print(btc_yahoo.tail())

[*********************100%***********************]  1 of 1 completed

Price           adj_close          close           high            low  \
Ticker            BTC-USD        BTC-USD        BTC-USD        BTC-USD   
date                                                                     
2025-09-03  111723.210938  111723.210938  112600.226562  110582.960938   
2025-09-04  110723.601562  110723.601562  112208.328125  109347.226562   
2025-09-05  110650.984375  110650.984375  113357.492188  110233.398438   
2025-09-06  110224.695312  110224.695312  111275.015625  110024.085938   
2025-09-07  111279.406250  111279.406250  111373.484375  110214.835938   

Price                open       volume  
Ticker            BTC-USD      BTC-USD  
date                                    
2025-09-03  111190.695312  61119643565  
2025-09-04  111718.148438  60131132901  
2025-09-05  110723.015625  60241647677  
2025-09-06  110650.570312  21500719036  
2025-09-07  110214.835938  24640706560  





## Opción B — Binance (REST público, sin API key)

Pros: datos de exchange; Contras: conviene respetar rate limits.
Descarga klines diarios (interval=1d) de BTCUSDT desde 2025-01-01 hasta hoy.

In [3]:
import requests, time, math
import pandas as pd
from datetime import datetime, timezone
from dateutil import tz


In [4]:
# pip install requests pandas python-dateutil
import requests, time, math
import pandas as pd
from datetime import datetime, timezone
from dateutil import tz

BASE = "https://api.binance.com"
SYMBOL = "BTCUSDT"
INTERVAL = "1d"
LIMIT = 1000  # máximo por llamada

def to_ms(dt: datetime) -> int:
    return int(dt.timestamp() * 1000)

def fetch_klines(symbol, interval, start_ms, end_ms=None, limit=1000):
    """
    Descarga klines en páginas hasta cubrir [start_ms, end_ms].
    Si end_ms es None, usa 'ahora'.
    """
    if end_ms is None:
        end_ms = to_ms(datetime.now(timezone.utc))

    out = []
    cur = start_ms
    while cur < end_ms:
        params = {
            "symbol": symbol,
            "interval": interval,
            "startTime": cur,
            "endTime": end_ms,
            "limit": limit
        }
        r = requests.get(f"{BASE}/api/v3/klines", params=params, timeout=30)
        r.raise_for_status()
        chunk = r.json()
        if not chunk:
            break
        out.extend(chunk)
        # Avanza al siguiente después del último closeTime recibido
        last_close_ms = chunk[-1][6]
        # Evita loops infinitos
        next_ms = last_close_ms + 1
        if next_ms <= cur:
            break
        cur = next_ms
        # Respeta rate limits básicos
        time.sleep(0.2)
    return out

# Fechas: desde 2025-01-01 00:00:00 UTC hasta ahora
start_dt_utc = datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
start_ms = to_ms(start_dt_utc)
end_ms = None

raw = fetch_klines(SYMBOL, INTERVAL, start_ms=start_ms, end_ms=end_ms, limit=LIMIT)

# Construye DataFrame
cols = [
    "open_time","open","high","low","close","volume",
    "close_time","quote_asset_volume","number_of_trades",
    "taker_buy_base_volume","taker_buy_quote_volume","ignore"
]
df = pd.DataFrame(raw, columns=cols)

# Tipos y tiempos
num_cols = ["open","high","low","close","volume","quote_asset_volume","taker_buy_base_volume","taker_buy_quote_volume"]
df[num_cols] = df[num_cols].astype(float)

df["open_time"]  = pd.to_datetime(df["open_time"], unit="ms", utc=True)
df["close_time"] = pd.to_datetime(df["close_time"], unit="ms", utc=True)

# Opcional: indexar por fecha de cierre (día de la vela)
df = df.set_index("close_time").sort_index()
df.index.name = "date"

# Selección de columnas OHLCV “clásicas”
df_ohlcv = df[["open","high","low","close","volume"]]

print(df_ohlcv.tail())

                                       open       high        low      close  \
date                                                                           
2025-09-03 23:59:59.999000+00:00  111240.01  112575.27  110528.71  111705.71   
2025-09-04 23:59:59.999000+00:00  111705.72  112180.00  109329.12  110730.87   
2025-09-05 23:59:59.999000+00:00  110730.87  113384.62  110206.96  110659.99   
2025-09-06 23:59:59.999000+00:00  110660.00  111307.70  109977.00  110187.97   
2025-09-07 23:59:59.999000+00:00  110187.98  111412.00  110180.00  111218.44   

                                       volume  
date                                           
2025-09-03 23:59:59.999000+00:00  11773.72084  
2025-09-04 23:59:59.999000+00:00  12203.13536  
2025-09-05 23:59:59.999000+00:00  21587.40888  
2025-09-06 23:59:59.999000+00:00   5000.29897  
2025-09-07 23:59:59.999000+00:00   5032.36277  


## Pipeline robusto REST+WS a Parquet (gratuito)

ModuleNotFoundError: No module named 'tenacity'