In [None]:
"""
Pipeline for Cryptocurrency Anomaly Detection.
"""

from pathlib import Path
import requests
import pandas as pd

from config import RAW_DATA_DIR, PROCESSED_DATA_DIR, COINGECKO_API_KEY


def fetch_coingecko_history(coin_id: str = "bitcoin", vs_currency: str = "usd", days: int = 365) -> Path:
    """
    Fetch daily price history for a coin from CoinGecko.
    """
    RAW_DATA_DIR.mkdir(parents=True, exist_ok=True)
    url = f"https://api.coingecko.com/api/v3/coins/{coin_id}/market_chart"
    params = {"vs_currency": vs_currency, "days": days}
    resp = requests.get(url, params=params, timeout=60)
    resp.raise_for_status()
    data = resp.json()["prices"]
    df = pd.DataFrame(data, columns=["timestamp_ms", "price"])
    df["timestamp"] = pd.to_datetime(df["timestamp_ms"], unit="ms")
    out_path = RAW_DATA_DIR / f"{coin_id}_history.csv"
    df.to_csv(out_path, index=False)
    return out_path


def run_pipeline():
    """
    Steps:

    1. Fetch historical prices for 1–N coins from CoinGecko.
    2. Resample to uniform intervals, compute returns and volatility features.
    3. Train LSTM model to forecast next-step price or return.
    4. Compute residuals between forecast and actual → anomaly scores.
    5. Optionally incorporate social sentiment as external feature.
    6. Export:
        - time series with anomaly flags
        - summary tables for Tableau.
    """
    PROCESSED_DATA_DIR.mkdir(parents=True, exist_ok=True)

    price_path = fetch_coingecko_history()

    # TODO:
    # prices = pd.read_csv(price_path, parse_dates=["timestamp"])
    # ts = prepare_timeseries(prices)
    # model = train_lstm(ts)
    # scored = flag_anomalies(model, ts)
    #
    # scored.to_csv(PROCESSED_DATA_DIR / "bitcoin_anomalies.csv", index=False)

    print("Crypto anomaly pipeline outline executed (implement TODOs).")


if __name__ == "__main__":
    run_pipeline()