# Leagues list — fetch and normalize

This notebook fetches all leagues from your backend API, normalizes the provider responses, displays a table of league IDs and names, and exports the result. It includes retry handling, pagination support, a simple visualization, and example pytest tests for the fetch logic.

Prerequisites:
- Backend API must be running and reachable (default used in this notebook: http://localhost:8000/leagues). If your backend base URL differs, update BASE_URL in the configuration cell.
- Python packages: requests, pandas, tqdm, matplotlib or seaborn, pytest (not all are strictly required to run every cell). See the install cell next.

In [13]:
# Install helper (optional). If packages missing, this will try to install them.
# You can also run these in your terminal: pip install requests pandas tqdm seaborn matplotlib pytest
import importlib
import subprocess
import sys

def _ensure(pkg: str, import_name: str = None):
    name = import_name or pkg
    try:
        return importlib.import_module(name)
    except Exception:
        print(f"Package {name} not found — attempting to install {pkg}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])
        return importlib.import_module(name)

# Optionally enable the following to auto-install when running the notebook
# requests = _ensure('requests')
# pandas = _ensure('pandas')
# tqdm = _ensure('tqdm')
# seaborn = _ensure('seaborn')
# matplotlib = _ensure('matplotlib')
# pytest = _ensure('pytest')
print('If you need installs, run pip install requests pandas tqdm seaborn matplotlib pytest')

If you need installs, run pip install requests pandas tqdm seaborn matplotlib pytest


In [14]:
# Configuration: endpoint and credentials
import os

# Default backend URL — update if your backend runs elsewhere
BASE_URL = os.environ.get('LEAGUES_BASE_URL') or 'http://localhost:8000'
LEAGUES_ENDPOINT = f"{BASE_URL.rstrip('/')}/leagues"
# Optional API key usage (not required for local router) — set LEAGUES_API_KEY if your deploy expects it
LEAGUES_API_KEY = os.environ.get('LEAGUES_API_KEY') or None

print('Using endpoint:', LEAGUES_ENDPOINT)
if LEAGUES_API_KEY:
    print('Using API key from LEAGUES_API_KEY')
else:
    print('No API key configured (will attempt unauthenticated request)')

Using endpoint: http://localhost:8000/leagues
No API key configured (will attempt unauthenticated request)


In [15]:
import time
import requests
from typing import Any, Dict, Optional


def get_json(url: str, params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, max_retries: int = 5, backoff_base: float = 0.5):
    """GET JSON with retries and basic 429 handling.

    Returns parsed JSON on success, raises requests.HTTPError or RuntimeError on repeated failure.
    """
    headers = dict(headers or {})
    attempt = 0
    last_exc = None
    while attempt < max_retries:
        try:
            resp = requests.get(url, params=params, headers=headers, timeout=30)
            if resp.status_code == 429:
                # rate limited — backoff and retry
                wait = backoff_base * (2 ** attempt)
                print(f"429 received; backing off {wait:.1f}s (attempt {attempt+1}/{max_retries})")
                time.sleep(wait)
                attempt += 1
                continue
            resp.raise_for_status()
            try:
                return resp.json()
            except ValueError:
                raise RuntimeError(f"Invalid JSON response from {url}: {resp.text[:200]}")
        except Exception as e:
            last_exc = e
            wait = backoff_base * (2 ** attempt)
            print(f"Request failed (attempt {attempt+1}/{max_retries}): {e}. Retrying in {wait:.1f}s...")
            time.sleep(wait)
            attempt += 1
    raise RuntimeError(f"Failed to GET {url} after {max_retries} attempts") from last_exc

In [16]:
from tqdm import tqdm
from typing import List


def _unwrap_envelope(payload: Any) -> Any:
    # Unwrap common envelope shapes: { ok: bool, data: ... } or { success:1, result: [...] }
    if payload is None:
        return None
    if isinstance(payload, dict):
        # common Next.js collect envelope: { ok: True, data: ... }
        if 'ok' in payload and payload.get('data') is not None:
            return payload.get('data')
        # AllSports raw: { success:1, result: [...] } or { result: [...] }
        for k in ('result', 'results', 'leagues', 'data', 'items'):
            v = payload.get(k)
            if v is not None:
                return v
    return payload


def fetch_all_leagues(endpoint: str, headers: Optional[Dict[str, str]] = None) -> List[Dict[str, Any]]:
    """Fetch leagues from endpoint.

    This function will attempt to unwrap envelope shapes returned by your backend router
    and the AllSports provider. It prints debug info to help diagnose why results are empty.
    """
    print(f"Fetching leagues from {endpoint}")
    data = get_json(endpoint, headers=headers)
    print('Raw response type:', type(data))

    # Unwrap possible envelope
    unwrapped = _unwrap_envelope(data)
    print('Unwrapped type:', type(unwrapped))

    # If unwrapped is dict, try to extract nested lists
    def extract(arr_like):
        if arr_like is None:
            return []
        if isinstance(arr_like, list):
            return arr_like
        if isinstance(arr_like, dict):
            for k in ("result", "results", "leagues", "data", "items"):
                v = arr_like.get(k)
                if isinstance(v, list):
                    return v
        return []

    items = extract(unwrapped)
    print('Items found at top-level list:', len(items))

    # If items found, return them
    if items:
        return items

    # If unwrapped is a dict with nested lists under many keys, aggregate them
    if isinstance(unwrapped, dict):
        lists = []
        for v in unwrapped.values():
            if isinstance(v, list):
                lists.extend(v)
        if lists:
            print('Aggregated nested lists length:', len(lists))
            return lists

    # As a final fallback, if original data is a dict and contains 'data' that is a list
    if isinstance(data, dict) and isinstance(data.get('data'), list):
        print('Found data[] under top-level envelope')
        return data.get('data')

    # Nothing found
    print('No league list could be extracted from response; returning empty list')
    return []

# Sample run (commented out; enable when running interactively)
# raw = fetch_all_leagues(LEAGUES_ENDPOINT, headers=( {'Authorization': f'Bearer {LEAGUES_API_KEY}'} if LEAGUES_API_KEY else None))
# print('fetched items:', len(raw))

In [17]:
import pandas as pd


def normalize_leagues(raw_list: List[Dict[str, Any]]) -> pd.DataFrame:
    """Normalize raw league objects into a DataFrame with selected columns.

    Attempts to extract common keys: id/league_id/league_key, league_name/name, country/country_name, sport, season/year, slug.
    """
    rows = []
    for item in raw_list:
        if not isinstance(item, dict):
            # sometimes providers return string names
            if isinstance(item, str):
                rows.append({"league_id": None, "name": item, "country": None, "raw": item})
            continue
        # heuristics for id
        lid = (item.get('league_id') or item.get('league_key') or item.get('id') or item.get('key') or item.get('idLeague'))
        name = (item.get('league_name') or item.get('name') or item.get('league') or item.get('league_title'))
        country = item.get('country') or item.get('country_name') or (item.get('country', {}) if isinstance(item.get('country'), str) else None)
        # handle nested country objects
        if isinstance(item.get('country'), dict):
            country = item['country'].get('name') or item['country'].get('country_name')
        sport = item.get('sport') or item.get('sport_name')
        season = item.get('season') or item.get('year') or item.get('league_season')
        slug = item.get('slug') or item.get('league_slug')
        rows.append({
            'league_id': str(lid) if lid is not None else None,
            'name': str(name).strip() if name else None,
            'country': country if isinstance(country, str) else (str(country) if country else None),
            'sport': sport if isinstance(sport, str) else None,
            'season': season if isinstance(season, str) else (str(season) if season else None),
            'slug': slug if isinstance(slug, str) else None,
            'raw': item,
        })

    df = pd.json_normalize(rows)
    # Deduplicate by league_id then name
    if 'league_id' in df.columns:
        df = df.drop_duplicates(subset=['league_id'], keep='first')
    df = df.drop_duplicates(subset=['name'], keep='first')
    return df

# Example usage (interactive)
# df = normalize_leagues(raw)
# df.head(20)

In [18]:
# Inspect and filter helpers

def inspect_df(df: pd.DataFrame):
    display(df.head(10))
    print('\nInfo:')
    print(df.info())
    if 'league_id' in df.columns:
        print('\nUnique ids:', df['league_id'].nunique())


def filter_by_country(df: pd.DataFrame, country: str) -> pd.DataFrame:
    return df[df['country'].str.contains(country, case=False, na=False)]


def lookup_by_id(df: pd.DataFrame, league_id: str) -> pd.DataFrame:
    return df[df['league_id'] == str(league_id)]

# Example: inspect_df(df)

In [19]:
# Export results
import os

OUTPUT_DIR = os.path.join(os.path.dirname(__file__), '..', 'notebooks', 'output') if '__file__' in globals() else os.path.join(os.getcwd(), 'notebooks', 'output')
os.makedirs(OUTPUT_DIR, exist_ok=True)


def export_df(df: pd.DataFrame, name: str = 'leagues_table'):
    csv_path = os.path.join(OUTPUT_DIR, f"{name}.csv")
    json_path = os.path.join(OUTPUT_DIR, f"{name}.json")
    df.to_csv(csv_path, index=False)
    df.to_json(json_path, orient='records', force_ascii=False)
    print('Exported:', csv_path, json_path)
    return csv_path, json_path

# Example usage: export_df(df)

In [20]:
# Visualization removed as requested by the user. If you need plots later, re-enable seaborn/matplotlib and the plotting function.
print('Plotting disabled')

Plotting disabled


In [21]:
# Unit tests (pytest) for fetch logic
# These are minimal and intended to be run with pytest in the integrated terminal.
# They use unittest.mock to patch requests.get responses.

from unittest.mock import patch, Mock
import pytest


def _mk_resp(status=200, json_data=None, text='ok'):
    m = Mock()
    m.status_code = status
    m.text = text
    m.json = Mock(return_value=json_data)
    def raise_for_status():
        if status >= 400:
            raise requests.HTTPError(f'status {status}')
    m.raise_for_status = raise_for_status
    return m


def test_get_json_success(monkeypatch):
    with patch('requests.get') as g:
        g.return_value = _mk_resp(200, {'result': [{'league_name': 'L1'}]})
        data = get_json('http://example/local')
        assert isinstance(data, dict)
        assert 'result' in data


def test_get_json_retry_on_429(monkeypatch):
    # First call returns 429, second call returns 200
    seq = [_mk_resp(429, None, 'rate limit'), _mk_resp(200, {'result': [{'league_name': 'Lx'}]})]
    with patch('requests.get', side_effect=seq):
        data = get_json('http://example/local', max_retries=3)
        assert 'result' in data


def test_fetch_all_leagues_pagination(monkeypatch):
    # First page returns a result list, should be accepted
    with patch('requests.get') as g:
        g.return_value = _mk_resp(200, [{'league_name': 'A'}, {'league_name': 'B'}])
        out = fetch_all_leagues('http://example/local')
        assert isinstance(out, list)
        assert len(out) == 2

# To run tests from terminal: pytest -q path/to/leagues_list.ipynb -k test_fetch_all_leagues_pagination
# Note: Running pytest directly on a notebook file requires nbval or conversion; alternatively place tests in tests/test_leagues_fetch.py

print('Unit test helper functions added (run with pytest in terminal)')

Unit test helper functions added (run with pytest in terminal)


## Run the full fetch and show summary

The cell below runs the full fetch flow: calls the leagues endpoint, normalizes into a DataFrame, shows a sample, plots leagues per country, and exports the table to `notebooks/output/leagues_table.csv`.

In [22]:
# Run the end-to-end flow (interactive) — no plotting
try:
    raw = fetch_all_leagues(LEAGUES_ENDPOINT, headers=( {'Authorization': f'Bearer {LEAGUES_API_KEY}'} if LEAGUES_API_KEY else None))
    print('Raw items fetched:', len(raw))
    # show a small snippet for debugging
    if isinstance(raw, list) and len(raw) > 0:
        import json
        print('Sample raw item:', json.dumps(raw[0], indent=2)[:500])
    df = normalize_leagues(raw)
    inspect_df(df)
    if not df.empty:
        # export only (no plotting)
        csv_path, json_path = export_df(df)
        print('\nExported to:', csv_path)
        print('\nSample id -> name mapping:')
        print(df[['league_id','name']].dropna().head(20).to_string(index=False))
    else:
        print('No leagues found after normalization — inspect raw output printed above for debugging')
except Exception as e:
    print('Fetch failed:', e)
    raise

Fetching leagues from http://localhost:8000/leagues
Raw response type: <class 'dict'>
Unwrapped type: <class 'dict'>
Items found at top-level list: 976
Raw items fetched: 976
Sample raw item: {
  "league_key": 4,
  "league_name": "UEFA Europa League",
  "country_key": 1,
  "country_name": "eurocups",
  "league_logo": "https://apiv2.allsportsapi.com/logo/logo_leagues/",
  "country_logo": null
}
Raw response type: <class 'dict'>
Unwrapped type: <class 'dict'>
Items found at top-level list: 976
Raw items fetched: 976
Sample raw item: {
  "league_key": 4,
  "league_name": "UEFA Europa League",
  "country_key": 1,
  "country_name": "eurocups",
  "league_logo": "https://apiv2.allsportsapi.com/logo/logo_leagues/",
  "country_logo": null
}


Unnamed: 0,league_id,name,country,sport,season,slug,raw.league_key,raw.league_name,raw.country_key,raw.country_name,raw.league_logo,raw.country_logo
0,4,UEFA Europa League,eurocups,,,,4,UEFA Europa League,1,eurocups,https://apiv2.allsportsapi.com/logo/logo_leagues/,
1,1,UEFA European Championship,eurocups,,,,1,UEFA European Championship,1,eurocups,,
2,683,UEFA Conference League,eurocups,,,,683,UEFA Conference League,1,eurocups,,
3,3,UEFA Champions League,eurocups,,,,3,UEFA Champions League,1,eurocups,https://apiv2.allsportsapi.com/logo/logo_leagu...,
4,633,UEFA Nations League,eurocups,,,,633,UEFA Nations League,1,eurocups,,
5,28,World Cup,Worldcup,,,,28,World Cup,8,Worldcup,https://apiv2.allsportsapi.com/logo/logo_leagu...,https://apiv2.allsportsapi.com/logo/logo_count...
6,152,Premier League,England,,,,152,Premier League,44,England,https://apiv2.allsportsapi.com/logo/logo_leagu...,https://apiv2.allsportsapi.com/logo/logo_count...
7,302,La Liga,Spain,,,,302,La Liga,6,Spain,https://apiv2.allsportsapi.com/logo/logo_leagu...,https://apiv2.allsportsapi.com/logo/logo_count...
8,207,Serie A,Italy,,,,207,Serie A,5,Italy,https://apiv2.allsportsapi.com/logo/logo_leagu...,https://apiv2.allsportsapi.com/logo/logo_count...
9,175,Bundesliga,Germany,,,,175,Bundesliga,4,Germany,https://apiv2.allsportsapi.com/logo/logo_leagu...,https://apiv2.allsportsapi.com/logo/logo_count...



Info:
<class 'pandas.core.frame.DataFrame'>
Index: 719 entries, 0 to 975
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   league_id         719 non-null    object
 1   name              719 non-null    object
 2   country           719 non-null    object
 3   sport             0 non-null      object
 4   season            0 non-null      object
 5   slug              0 non-null      object
 6   raw.league_key    719 non-null    int64 
 7   raw.league_name   719 non-null    object
 8   raw.country_key   719 non-null    int64 
 9   raw.country_name  719 non-null    object
 10  raw.league_logo   282 non-null    object
 11  raw.country_logo  704 non-null    object
dtypes: int64(2), object(10)
memory usage: 73.0+ KB
None

Unique ids: 719
Exported: c:\Users\lnipu\Projects\Sports-Analysis\sports-ai\notebooks\notebooks\output\leagues_table.csv c:\Users\lnipu\Projects\Sports-Analysis\sports-ai\notebooks\notebooks\