# Market Data Fetching with Adapter Pattern

This notebook recreates the lightweight fetcher adapters used in the XABCD bot project so anyone can pull OHLCV data without installing the full package. The adapters cover:

- Crypto markets through `ccxt`
- GPW (Warsaw Stock Exchange), American equities, and Forex pairs through `yfinance`

Follow the sections below to install the dependencies, inspect the adapters, and run sample queries.

## 1. Install Dependencies

Run the cell below once per environment. It installs every package required by the adapters and is safe to execute multiple times.

In [11]:
!pip install --quiet pandas yfinance ccxt

## 2. Shared Imports and Base Adapter

The base class guarantees that every concrete fetcher returns the same canonical schema: `timestamp`, `open`, `high`, `low`, `close`, `volume`, `open_time`, and `close_time`.

In [12]:
from __future__ import annotations

import time
from abc import ABC, abstractmethod
from datetime import datetime, timedelta, timezone
from typing import Optional, Tuple

import pandas as pd


class BaseOHLCVAdapter(ABC):
    """Abstract base class for OHLCV data fetchers.

    Concrete implementations must return a DataFrame with the canonical schema.
    Helper `_finalize_df` enforces ordering and adds `open_time` / `close_time`.
    """

    COLUMNS = [
        "timestamp",
        "open",
        "high",
        "low",
        "close",
        "volume",
        "open_time",
        "close_time",
    ]

    def __init__(self, rate_limit: float = 0.0):
        self.rate_limit = float(rate_limit)

    @abstractmethod
    def fetch_ohlcv(
        self,
        exchange_name: str,
        symbol: str,
        timeframe: str,
        since: Optional[int] = None,
        until: Optional[int] = None,
        limit: int = 1000,
    ) -> pd.DataFrame:
        """Fetch OHLCV bars and return the canonical DataFrame."""

    def _finalize_df(self, df: pd.DataFrame, timeframe: str) -> pd.DataFrame:
        """Ensure required columns exist, add open/close datetimes, and sort rows."""
        if df is None or df.empty:
            return pd.DataFrame(columns=self.COLUMNS)

        out = df.copy()
        for column in ["timestamp", "open", "high", "low", "close", "volume"]:
            if column not in out.columns:
                if column == "timestamp":
                    raise ValueError("Missing required 'timestamp' column in OHLCV data")
                out[column] = 0.0

        if "open_time" not in out.columns:
            out["open_time"] = pd.to_datetime(out["timestamp"], unit="ms", utc=True).dt.tz_convert(None)

        if "close_time" not in out.columns:
            try:
                delta = pd.to_timedelta(timeframe)
                if not isinstance(delta, pd.Timedelta) or delta <= pd.Timedelta(0):
                    raise ValueError
            except Exception:
                delta = timedelta(0)
            out["close_time"] = out["open_time"] + delta

        out = out.sort_values("timestamp").reset_index(drop=True)
        ordered = [c for c in self.COLUMNS if c in out.columns]
        extras = [c for c in out.columns if c not in ordered]
        return out[ordered + extras]

## 3. yfinance Utilities and Unified Fetcher

`YFinanceFetcher` handles equities and FX symbols. It takes care of ticker mapping, timeframe conversions, and optional resampling when an interval is not provided by yfinance directly.

In [13]:
import warnings

try:
    import yfinance as yf
except ImportError as exc:
    raise ImportError("Install yfinance before creating a YFinanceFetcher (see the 'Install Dependencies' cell).") from exc


def _parse_tf_delta(tf: str) -> pd.Timedelta:
    tf = str(tf).strip().lower()
    try:
        delta = pd.to_timedelta(tf)
        if isinstance(delta, pd.Timedelta) and delta > pd.Timedelta(0):
            return delta
    except Exception:
        pass
    if tf.endswith("m"):
        return pd.Timedelta(minutes=int(tf[:-1] or 0))
    if tf.endswith("h"):
        return pd.Timedelta(hours=int(tf[:-1] or 0))
    if tf.endswith("d"):
        return pd.Timedelta(days=int(tf[:-1] or 0))
    if tf.endswith("w"):
        return pd.Timedelta(weeks=int(tf[:-1] or 0))
    return pd.Timedelta(0)


def _to_dt(ms: Optional[int]) -> Optional[pd.Timestamp]:
    if ms is None:
        return None
    return pd.to_datetime(ms, unit="ms", utc=True)


def _map_forex_symbol(symbol: str) -> str:
    compact = symbol.replace("/", "").upper()
    return f"{compact}=X"


def _map_gpw_symbol(symbol: str) -> str:
    symbol = symbol.upper()
    if symbol.endswith(".WA"):
        return symbol
    if "." in symbol:
        return symbol
    return f"{symbol}.WA"


class YFinanceFetcher(BaseOHLCVAdapter):
    """
    Unified adapter using yfinance for Forex, and GPW equities.

    exchange_name controls ticker mapping:
      - 'forex', 'fx' -> 'EURUSD=X'
      - 'gpw', 'wa', 'stooq' -> 'TICKER.WA'
      - default: use symbol as-is
    """

    def _map_symbol(self, exchange_name: str, symbol: str) -> str:
        ex = (exchange_name or "").lower()
        if any(k in ex for k in ["forex", "fx", "oanda"]):
            return _map_forex_symbol(symbol)
        if any(k in ex for k in ["gpw", "wa", "stooq", "warsaw"]):
            return _map_gpw_symbol(symbol)
        return symbol

    def fetch_ohlcv(
        self,
        exchange_name: str,
        symbol: str,
        timeframe: str,
        since: Optional[int] = None,
        until: Optional[int] = None,
        limit: int = 1000,
    ) -> pd.DataFrame:
        if yf is None:
            raise ImportError("yfinance is not installed. Please add 'yfinance' to your dependencies.")

        yf_symbol = self._map_symbol(exchange_name, symbol)

        # Derive a start based on limit when since is not provided
        start_dt = _to_dt(since)
        end_dt = _to_dt(until)
        if start_dt is None and limit:
            delta = _parse_tf_delta(timeframe)
            if delta <= pd.Timedelta(0):
                # conservative default: 600 bars of 1h if unknown
                delta = pd.Timedelta(hours=1)
            lookback = int(limit * 1.2) + 10
            start_dt = datetime.now(timezone.utc) - (lookback * delta)

        # Small pacing for courtesy if repeatedly called
        if self.rate_limit > 0:
            time.sleep(self.rate_limit)

        # Fetch
        df = yf.download(
            tickers=yf_symbol,
            interval=timeframe,
            start=start_dt,
            end=end_dt,
            auto_adjust=False,
            progress=False,
            threads=False,
        )

        if df is None or df.empty:
            return pd.DataFrame(columns=self.COLUMNS)

        # Ensure single-symbol frame with standard columns
        # yfinance returns columns like ['Open','High','Low','Close','Adj Close','Volume']
        cols = {"Open": "open", "High": "high", "Low": "low", "Close": "close", "Volume": "volume"}
        df = df.rename(columns=cols)
        for c in ["open", "high", "low", "close", "volume"]:
            if c not in df.columns:
                # Create missing column (e.g., 'volume' for some crypto pairs)
                df[c] = 0.0

        # Build canonical output
        idx = df.index.tz_localize("UTC") if df.index.tz is None else df.index.tz_convert("UTC")

        print(f"Fetched {len(df)}")
        df = df.reset_index()
        out = pd.DataFrame({
            "timestamp": (idx.view("int64") // 10**6).astype("int64"),  # ms
            "open": df["open"].astype(float)[yf_symbol],
            "high": df["high"].astype(float)[yf_symbol],
            "low": df["low"].astype(float)[yf_symbol],
            "close": df["close"].astype(float)[yf_symbol],
            "volume": df["volume"].astype(float)[yf_symbol],
        })

        # Clip to 'until' if provided (inclusive by candle open)
        if until is not None:
            out = out[out["timestamp"] <= int(until)]

        return self._finalize_df(out, timeframe)

## 4. Specialized Fetchers for Forex, GPW, and US Equities

In [14]:
class ForexFetcher(YFinanceFetcher):
    """Forex OHLCV adapter backed by yfinance (maps to `PAIR=X`)."""

    def fetch_ohlcv(
        self,
        exchange_name: str,
        symbol: str,
        timeframe: str,
        since: Optional[int] = None,
        until: Optional[int] = None,
        limit: int = 1000,
    ) -> pd.DataFrame:
        return super().fetch_ohlcv("forex", symbol, timeframe, since=since, until=until, limit=limit)


class GPWFetcher(YFinanceFetcher):
    """GPW OHLCV adapter that appends the `.WA` suffix automatically."""

    def fetch_ohlcv(
        self,
        exchange_name: str,
        symbol: str,
        timeframe: str,
        since: Optional[int] = None,
        until: Optional[int] = None,
        limit: int = 1000,
    ) -> pd.DataFrame:
        return super().fetch_ohlcv("gpw", symbol, timeframe, since=since, until=until, limit=limit)


class AmericanStockFetcher(YFinanceFetcher):
    """US equities adapter that leaves symbols untouched (e.g. `AAPL`, `SPY`)."""

    def fetch_ohlcv(
        self,
        exchange_name: str,
        symbol: str,
        timeframe: str,
        since: Optional[int] = None,
        until: Optional[int] = None,
        limit: int = 1000,
    ) -> pd.DataFrame:
        return super().fetch_ohlcv("america", symbol, timeframe, since=since, until=until, limit=limit)

## 5. Crypto Fetcher via ccxt

The crypto fetcher mirrors the project implementation and supports any exchange supported by `ccxt`. It keeps exchange instances cached for re-use.

In [15]:
try:
    import ccxt
except ImportError as exc:
    raise ImportError("Install ccxt before creating a CryptoFetcher (see the 'Install Dependencies' cell).") from exc


class CryptoFetcher(BaseOHLCVAdapter):
    """Crypto OHLCV fetcher backed by ccxt."""

    def __init__(self, rate_limit: float = 0.2):
        super().__init__(rate_limit=rate_limit)
        self._exchanges = {}

    def get_exchange(self, name: str):
        key = name.lower()
        if key not in self._exchanges:
            if not hasattr(ccxt, key):
                raise ValueError(f"Exchange '{name}' is not supported by ccxt.")
            self._exchanges[key] = getattr(ccxt, key)()
        return self._exchanges[key]

    def fetch_ohlcv(
        self,
        exchange_name: str,
        symbol: str,
        timeframe: str,
        since: Optional[int] = None,
        until: Optional[int] = None,
        limit: int = 1000,
    ) -> pd.DataFrame:
        exchange = self.get_exchange(exchange_name)
        all_batches = []
        fetch_since = since

        while True:
            if self.rate_limit > 0:
                time.sleep(self.rate_limit)

            batch = exchange.fetch_ohlcv(symbol=symbol, timeframe=timeframe, since=fetch_since, limit=limit)
            if not batch:
                break

            frame = pd.DataFrame(batch, columns=["timestamp", "open", "high", "low", "close", "volume"])
            all_batches.append(frame)

            last_ts = int(frame["timestamp"].iloc[-1])
            if until is not None and last_ts >= until:
                break
            if since is None:
                break
            fetch_since = last_ts + 1

        if not all_batches:
            return pd.DataFrame(columns=self.COLUMNS)

        data = pd.concat(all_batches, ignore_index=True)
        if until is not None:
            data = data[data["timestamp"] <= until]

        return self._finalize_df(data, timeframe)

## 6. Helper for Millisecond Timestamps

Many APIs expect UNIX timestamps in milliseconds. The helper below converts pandas/NumPy-friendly timestamps for convenience.

In [16]:
def to_milliseconds(dt: pd.Timestamp) -> int:
    """Convert a pandas Timestamp (or datetime) to milliseconds since epoch."""
    ts = pd.Timestamp(dt)
    if ts.tz is None:
        ts = ts.tz_localize("UTC")
    else:
        ts = ts.tz_convert("UTC")
    return int(ts.value // 10**6)

## 7. Usage Examples

Each example fetches a small recent window of data to demonstrate the API. Feel free to tweak the parameters, symbols, or exchanges.

In [17]:
# Crypto: BTC/USDT on Binance, last 7 days of hourly candles
crypto_fetcher = CryptoFetcher(rate_limit=0.2)
seven_days_ago = pd.Timestamp.utcnow() - pd.Timedelta(days=7)
crypto_df = crypto_fetcher.fetch_ohlcv(
    exchange_name="binance",
    symbol="BTC/USDT",
    timeframe="1h",
    since=to_milliseconds(seven_days_ago),
    limit=500,
)
crypto_df.head()

Unnamed: 0,timestamp,open,high,low,close,volume,open_time,close_time
0,1757772000000,115761.06,115950.0,115603.98,115797.68,438.72685,2025-09-13 14:00:00,2025-09-13 15:00:00
1,1757775600000,115797.67,115924.6,115619.57,115768.68,290.44971,2025-09-13 15:00:00,2025-09-13 16:00:00
2,1757779200000,115768.68,115817.37,115127.27,115334.0,700.8183,2025-09-13 16:00:00,2025-09-13 17:00:00
3,1757782800000,115333.99,115587.49,115180.0,115571.78,465.25625,2025-09-13 17:00:00,2025-09-13 18:00:00
4,1757786400000,115571.79,115653.26,115433.09,115633.92,160.49564,2025-09-13 18:00:00,2025-09-13 19:00:00


In [18]:
# GPW: PKN Orlen daily candles
gpw_fetcher = GPWFetcher()
gpw_df = gpw_fetcher.fetch_ohlcv(
    exchange_name="gpw",
    symbol="PKN",
    timeframe="1d",
    limit=180,
)
gpw_df.tail()

Fetched 157


Unnamed: 0,timestamp,open,high,low,close,volume,open_time,close_time
152,1757894400000,81.300003,82.760002,81.129997,82.75,1980243.0,2025-09-15,2025-09-16
153,1757980800000,82.769997,83.190002,81.779999,82.459999,1931868.0,2025-09-16,2025-09-17
154,1758067200000,82.720001,82.940002,81.559998,82.220001,1577457.0,2025-09-17,2025-09-18
155,1758153600000,82.220001,82.739998,81.800003,82.129997,1371905.0,2025-09-18,2025-09-19
156,1758240000000,82.489998,82.720001,80.709999,81.519997,3932167.0,2025-09-19,2025-09-20


In [19]:
# Forex: EUR/USD hourly candles
forex_fetcher = ForexFetcher(rate_limit=0.0)
forex_df = forex_fetcher.fetch_ohlcv(
    exchange_name="forex",
    symbol="EUR/USD",
    timeframe="1h",
    limit=120,
)
forex_df.head()

Fetched 119


Unnamed: 0,timestamp,open,high,low,close,volume,open_time,close_time
0,1757890800000,1.173158,1.173434,1.172883,1.172883,0.0,2025-09-14 23:00:00,2025-09-15 00:00:00
1,1757894400000,1.17302,1.17302,1.172608,1.17302,0.0,2025-09-15 00:00:00,2025-09-15 01:00:00
2,1757898000000,1.172883,1.173296,1.172745,1.173158,0.0,2025-09-15 01:00:00,2025-09-15 02:00:00
3,1757901600000,1.173158,1.173296,1.172883,1.173158,0.0,2025-09-15 02:00:00,2025-09-15 03:00:00
4,1757905200000,1.173158,1.173709,1.17302,1.173434,0.0,2025-09-15 03:00:00,2025-09-15 04:00:00


In [20]:
# US Equities: Apple 30-minute candles
us_fetcher = AmericanStockFetcher()
american_df = us_fetcher.fetch_ohlcv(
    exchange_name="america",
    symbol="AAPL",
    timeframe="30m",
    limit=160,
)
american_df.head()

Fetched 52


Unnamed: 0,timestamp,open,high,low,close,volume,open_time,close_time
0,1758029400000,237.0,240.559998,236.323502,240.360001,6236092.0,2025-09-16 13:30:00,2025-09-16 14:00:00
1,1758031200000,240.375,241.220001,239.479996,239.529999,5230101.0,2025-09-16 14:00:00,2025-09-16 14:30:00
2,1758033000000,239.529999,239.729996,238.729996,238.979996,2758954.0,2025-09-16 14:30:00,2025-09-16 15:00:00
3,1758034800000,238.990005,239.850006,238.244995,239.592896,2579324.0,2025-09-16 15:00:00,2025-09-16 15:30:00
4,1758036600000,239.580093,239.580093,238.464996,238.654999,1658686.0,2025-09-16 15:30:00,2025-09-16 16:00:00
