In [3]:
import os, requests, time
from pprint import pprint

# Use your key if you have one; "3" is the public demo key; "123" also works for v1
API_KEY = os.getenv("THESPORTSDB_KEY", "3")
BASE_URL = f"https://www.thesportsdb.com/api/v1/json/{API_KEY}"

def GET(path, params=None, timeout=15):
    url = f"{BASE_URL}/{path.lstrip('/')}"
    r = requests.get(url, params=params or {}, timeout=timeout)
    r.raise_for_status()
    data = r.json()
    # TheSportsDB often returns {"x": None} when empty → normalize to {}
    if isinstance(data, dict) and data and all(v is None for v in data.values()):
        return {}
    return data

print("Using:", BASE_URL)

Using: https://www.thesportsdb.com/api/v1/json/3


## List sports (what your key can actually see)

In [4]:
sports = (GET("all_sports.php").get("sports") or [])
print("Total sports available:", len(sports))
for s in sports:
    print("-", s.get("strSport"))

Total sports available: 1
- Soccer


## List all leagues (optionally filter by sport)

In [5]:
ALL_LEAGUES = (GET("all_leagues.php").get("leagues") or [])
print("Total leagues (all sports):", len(ALL_LEAGUES))

# Filter helper
def leagues_for_sport(sport_name: str):
    s = sport_name.strip().lower()
    return [L for L in ALL_LEAGUES if (L.get("strSport") or "").strip().lower() == s]

# Example: Soccer leagues
SOCCER_LEAGUES = leagues_for_sport("Soccer")
print("Soccer leagues:", len(SOCCER_LEAGUES))
for L in SOCCER_LEAGUES[:34]:
    print("-", L["idLeague"], L["strLeague"])
print("GET helper BASE_URL =", BASE_URL)

Total leagues (all sports): 50
Soccer leagues: 34
- 4328 English Premier League
- 4329 English League Championship
- 4330 Scottish Premier League
- 4331 German Bundesliga
- 4332 Italian Serie A
- 4334 French Ligue 1
- 4335 Spanish La Liga
- 4336 Greek Superleague Greece
- 4337 Dutch Eredivisie
- 4338 Belgian Pro League
- 4339 Turkish Super Lig
- 4340 Danish Superliga
- 4344 Portuguese Primeira Liga
- 4346 American Major League Soccer
- 4347 Swedish Allsvenskan
- 4350 Mexican Primera League
- 4351 Brazilian Serie A
- 4354 Ukrainian Premier League
- 4355 Russian Football Premier League
- 4356 Australian A-League
- 4358 Norwegian Eliteserien
- 4359 Chinese Super League
- 4367 _No League
- 4394 Italian Serie B
- 4395 Scottish Championship
- 4396 English League 1
- 4397 English League 2
- 4398 Italian Serie C Girone C
- 4399 German 2. Bundesliga
- 4400 Spanish La Liga 2
- 4401 French Ligue 2
- 4403 Swedish Superettan
- 4404 Brazilian Serie B
- 4406 Argentinian Primera Division
GET helper BA

## (Optional) Leagues by sport + country using the search endpoint

In [6]:
def leagues_by_country_and_sport(country: str, sport: str):
    data = GET("search_all_leagues.php", {"c": country, "s": sport}) or {}
    return data.get("countrys") or []

eng_soc = leagues_by_country_and_sport("England", "Soccer")
print("England + Soccer leagues:", len(eng_soc))
for L in eng_soc:
    print("-", L["idLeague"], L["strLeague"])

England + Soccer leagues: 0


## Teams in a league

In [8]:
LEAGUE_ID = "4359"   # English Premier League (example)
teams_resp = GET("lookup_all_teams.php", {"id": LEAGUE_ID})
teams = teams_resp.get("teams") or []
print(f"Teams in league {LEAGUE_ID}:", len(teams))
for t in teams[:24]:
    print("-", t["idTeam"], t["strTeam"])

Teams in league 4359: 24
- 133606 Bolton Wanderers
- 133607 Wigan Athletic
- 133618 Blackpool
- 133620 Doncaster Rovers
- 133630 Barnsley
- 133631 Peterborough United
- 133633 Reading
- 133637 Cardiff City
- 133836 Plymouth Argyle
- 133888 Luton Town
- 133932 Huddersfield Town
- 134189 Bradford City
- 134231 Rotherham United
- 134241 AFC Wimbledon
- 134258 Stockport County
- 134365 Exeter City
- 134367 Leyton Orient
- 134370 Northampton Town
- 134375 Port Vale
- 134376 Burton Albion
- 134378 Stevenage
- 134381 Mansfield Town
- 134382 Wycombe Wanderers
- 135900 Lincoln City


## Team lookup + players (squad)

In [9]:
# Pick a team id (Arsenal is often 133604; adjust if not present)
TEAM_ID = teams[0]["idTeam"] if teams else "133604"

team_detail = (GET("lookupteam.php", {"id": TEAM_ID}).get("teams") or [{}])[0]
print("Team snapshot:")
pprint({k: team_detail.get(k) for k in [
    "idTeam","strTeam","strLeague","strCountry","intFormedYear",
    "strStadium","strWebsite","strTeamBadge"
]})

players = (GET("lookup_all_players.php", {"id": TEAM_ID}).get("player") or [])
print("\nPlayers in team:", len(players))
for p in players[:12]:
    print("-", p.get("idPlayer"), p.get("strPlayer"), "|", p.get("strPosition"))

Team snapshot:
{'idTeam': '133604',
 'intFormedYear': '1892',
 'strCountry': 'England',
 'strLeague': 'English Premier League',
 'strStadium': 'Emirates Stadium',
 'strTeam': 'Arsenal',
 'strTeamBadge': None,
 'strWebsite': 'www.arsenal.com'}

Players in team: 29
- 34175641 Albert Sambi Lokonga | Central Midfield
- 34163698 Ben White | Right-Back
- 34169884 Bukayo Saka | Right Winger
- 34164499 Christian Nørgaard | Defensive Midfield
- 34194118 Cristhian Mosquera | Centre-Back
- 34148681 David Raya | Goalkeeper
- 34161584 Declan Rice | Defensive Midfield
- 34200626 Ethan Nwaneri | Attacking Midfield
- 34176976 Fábio Vieira | Attacking Midfield
- 34160962 Gabriel Jesus | Centre-Forward
- 34172252 Gabriel Magalhães | Centre-Back
- 34169883 Gabriel Martinelli | Left Wing


## Search team / player by name

In [10]:
def search_teams(name: str):
    return (GET("searchteams.php", {"t": name}).get("teams") or [])

def search_players(name: str):
    return (GET("searchplayers.php", {"p": name}).get("player") or [])

print("Search team 'Arsenal':", [t["idTeam"] for t in search_teams("Arsenal")][:5])
print("Search player 'Saka':", [p["idPlayer"] for p in search_players("Saka")][:5])

Search team 'Arsenal': ['133604']
Search player 'Saka': ['34155082', '34155213', '34168458', '34168973', '34169884']


## Past / Next matches for a league

In [None]:
def league_matches(league_id: str, kind="past", limit=10):
    if kind == "past":
        raw = (GET("eventspastleague.php", {"id": league_id}).get("events") or [])
    else:
        raw = (GET("eventsnextleague.php", {"id": league_id}).get("events") or [])
    return raw[:limit]

past = league_matches(LEAGUE_ID, "past", 10)
upcoming = league_matches(LEAGUE_ID, "next", 10)

print("Past matches:", len(past))
for e in past[:10]:
    print("-", e.get("dateEvent"), e.get("strEvent"), e.get("intHomeScore"), e.get("intAwayScore"), "| id=", e.get("idEvent"))

print("\nUpcoming matches:", len(upcoming))
for e in upcoming[:10]:
    print("-", e.get("dateEvent"), e.get("strEvent"), "| id=", e.get("idEvent"))

## Seasons for a league + full schedule for a season

In [None]:
seasons = [s["strSeason"] for s in (GET("search_all_seasons.php", {"id": LEAGUE_ID}).get("seasons") or [])]
print("Seasons:", seasons[:10])

if seasons:
    SZN = seasons[-1]  # pick the latest available
    season_events = (GET("eventsseason.php", {"id": LEAGUE_ID, "s": SZN}).get("events") or [])
    print(f"\nSeason {SZN} events:", len(season_events))
    for e in season_events[:8]:
        print("-", e.get("dateEvent"), e.get("strEvent"), "| id=", e.get("idEvent"))

## Matches by day (all leagues) for a sport

In [None]:
DAY = "2025-08-17"
day_events = (GET("eventsday.php", {"d": DAY, "s": "Soccer"}).get("events") or [])
print(f"Events on {DAY}:", len(day_events))
for e in day_events[:10]:
    print("-", e.get("strLeague"), "|", e.get("dateEvent"), e.get("strEvent"), "| id=", e.get("idEvent"))

## Team past/next matches

In [None]:
def team_matches(team_id: str, kind="last", limit=5):
    if kind == "last":
        raw = (GET("eventslast.php", {"id": team_id}).get("results") or [])
    else:
        raw = (GET("eventsnext.php", {"id": team_id}).get("events") or [])
    return raw[:limit]

tm_past = team_matches(TEAM_ID, "last", 5)
tm_next = team_matches(TEAM_ID, "next", 5)
print("Team last:", len(tm_past), "| Team next:", len(tm_next))
for e in tm_past:
    print("-", e.get("dateEvent"), e.get("strEvent"), e.get("intHomeScore"), e.get("intAwayScore"), "| id=", e.get("idEvent"))

In [1]:
import time, httpx, pandas as pd

API_BASE = "https://www.thesportsdb.com/api/v1/json/3"   # free key=3
BACKEND = "http://127.0.0.1:8000/collect"                # your agent route

def get_upstream(path, params=None, add_ts=False):
    url = f"{API_BASE}/{path.lstrip('/')}"
    q = dict(params or {})
    if add_ts:  # bypass CDN caches during debugging
        q["ts"] = f"{time.time():.6f}"
    r = httpx.get(url, params=q, timeout=30)
    r.raise_for_status()
    return r.json()

def call_agent(intent, **args):
    r = httpx.post(BACKEND, json={"intent": intent, "args": args}, timeout=30)
    r.raise_for_status()
    return r.json()

def players_df(players):
    cols = ["idPlayer","strPlayer","strTeam","idTeam","strNationality","strPosition","dateBorn"]
    return pd.DataFrame(players or [])[cols].fillna("")

In [2]:
teams_to_check = ["Arsenal", "Chelsea", "Bayern Munich", "Barcelona"]

resolved = {}
for name in teams_to_check:
    s = get_upstream("searchteams.php", {"t": name}, add_ts=True)
    teams = s.get("teams") or []
    if not teams:
        print(f"[UPSTREAM] No team found for {name}")
        continue
    # prefer exact match, else first
    exact = [t for t in teams if (t.get("strTeam") or "").strip().lower()==name.lower()]
    pick = exact[0] if exact else teams[0]
    resolved[name] = {"idTeam": pick.get("idTeam"), "idLeague": pick.get("idLeague"), "strLeague": pick.get("strLeague")}
    print(f"[RESOLVED] {name} -> idTeam={resolved[name]['idTeam']}  league={resolved[name]['strLeague']}({resolved[name]['idLeague']})")

print("\n--- Fetch players via lookup_all_players.php ---")
out = {}
for name, meta in resolved.items():
    pid = meta["idTeam"]
    d = get_upstream("lookup_all_players.php", {"id": pid}, add_ts=True)
    players = d.get("player") or []
    df = players_df(players)
    out[name] = df
    print(f"{name}: {len(df)} players | sample: {list(df['strPlayer'].head(5))}")

# simple overlap check
print("\n--- Pairwise overlaps (by idPlayer) ---")
names = list(out.keys())
for i in range(len(names)):
    for j in range(i+1, len(names)):
        a, b = names[i], names[j]
        ai = set(out[a]["idPlayer"])
        bi = set(out[b]["idPlayer"])
        inter = ai & bi
        print(f"{a} vs {b}: overlap={len(inter)}")

[RESOLVED] Arsenal -> idTeam=133604  league=English Premier League(4328)
[RESOLVED] Chelsea -> idTeam=133610  league=English Premier League(4328)
[RESOLVED] Bayern Munich -> idTeam=133664  league=German Bundesliga(4331)
[RESOLVED] Barcelona -> idTeam=133739  league=Spanish La Liga(4335)

--- Fetch players via lookup_all_players.php ---
Arsenal: 29 players | sample: ['Albert Sambi Lokonga', 'Ben White', 'Bukayo Saka', 'Christian Nørgaard', 'Cristhian Mosquera']
Chelsea: 29 players | sample: ['Albert Sambi Lokonga', 'Ben White', 'Bukayo Saka', 'Christian Nørgaard', 'Cristhian Mosquera']
Bayern Munich: 29 players | sample: ['Albert Sambi Lokonga', 'Ben White', 'Bukayo Saka', 'Christian Nørgaard', 'Cristhian Mosquera']
Barcelona: 29 players | sample: ['Albert Sambi Lokonga', 'Ben White', 'Bukayo Saka', 'Christian Nørgaard', 'Cristhian Mosquera']

--- Pairwise overlaps (by idPlayer) ---
Arsenal vs Chelsea: overlap=29
Arsenal vs Bayern Munich: overlap=29
Arsenal vs Barcelona: overlap=29
Chel

In [11]:
names = ["Arsenal", "Chelsea", "Bayern Munich", "Barcelona"]
agent_res = {}

for name in names:
    res = call_agent("players.list", teamName=name)
    ok = res.get("ok")
    players = (res.get("data") or {}).get("players") or []
    df = players_df(players)
    agent_res[name] = df
    print(f"[AGENT] {name}: ok={ok} | players={len(df)} | sample: {list(df['strPlayer'].head(5))}")

print("\n--- Agent pairwise overlaps (by idPlayer) ---")
for i in range(len(names)):
    for j in range(i+1, len(names)):
        a, b = names[i], names[j]
        ai = set(agent_res[a]["idPlayer"])
        bi = set(agent_res[b]["idPlayer"])
        print(f"{a} vs {b}: overlap={len(ai & bi)}")

[AGENT] Arsenal: ok=True | players=29 | sample: ['Albert Sambi Lokonga', 'Ben White', 'Bukayo Saka', 'Christian Nørgaard', 'Cristhian Mosquera']
[AGENT] Chelsea: ok=True | players=29 | sample: ['Albert Sambi Lokonga', 'Ben White', 'Bukayo Saka', 'Christian Nørgaard', 'Cristhian Mosquera']
[AGENT] Bayern Munich: ok=True | players=29 | sample: ['Albert Sambi Lokonga', 'Ben White', 'Bukayo Saka', 'Christian Nørgaard', 'Cristhian Mosquera']
[AGENT] Barcelona: ok=True | players=29 | sample: ['Albert Sambi Lokonga', 'Ben White', 'Bukayo Saka', 'Christian Nørgaard', 'Cristhian Mosquera']

--- Agent pairwise overlaps (by idPlayer) ---
Arsenal vs Chelsea: overlap=29
Arsenal vs Bayern Munich: overlap=29
Arsenal vs Barcelona: overlap=29
Chelsea vs Bayern Munich: overlap=29
Chelsea vs Barcelona: overlap=29
Bayern Munich vs Barcelona: overlap=29


In [12]:
import httpx, hashlib, time, json

API = "https://www.thesportsdb.com/api/v1/json/3"

def raw(path, params):
    r = httpx.get(f"{API}/{path}", params=params, timeout=30)
    r.raise_for_status()
    return r.text

teams = {
    "Arsenal": "133604",
    "Chelsea": "133610",
    "Bayern": "133664",
    "Barcelona": "133739",
}

hashes = {}
for name, tid in teams.items():
    # add a ts param to try to bypass caches; if origin ignores id, hashes still match
    txt = raw("lookup_all_players.php", {"id": tid, "ts": f"{time.time():.6f}"})
    h = hashlib.sha1(txt.encode("utf-8")).hexdigest()
    hashes[name] = h
    print(f"{name}: sha1={h} length={len(txt)}")

print("\nAll equal? →", len(set(hashes.values())) == 1)

Arsenal: sha1=67ae5753e256c2e0ab281fe055795d201bbc8508 length=74699
Chelsea: sha1=67ae5753e256c2e0ab281fe055795d201bbc8508 length=74699
Bayern: sha1=67ae5753e256c2e0ab281fe055795d201bbc8508 length=74699
Barcelona: sha1=67ae5753e256c2e0ab281fe055795d201bbc8508 length=74699

All equal? → True


In [13]:
import pandas as pd

def get_players_by_team_name(team_name: str):
    r = httpx.get(f"{API}/searchplayers.php", params={"t": team_name}, timeout=30)
    r.raise_for_status()
    data = r.json()
    return data.get("player") or []

for team in ["Arsenal","Chelsea","Bayern Munich","Barcelona"]:
    players = get_players_by_team_name(team)
    df = pd.DataFrame(players, columns=["idPlayer","strPlayer","strTeam","strPosition","strNationality"]).fillna("")
    print(f"\n{team}: {len(df)} players")
    print(list(df["strPlayer"].head(8)))


Arsenal: 25 players
['Mikel Arteta', 'Raheem Sterling', 'David Raya', 'Leandro Trossard', 'Martin Ødegaard', 'Kepa Arrizabalaga', 'Oleksandr Zinchenko', 'Gabriel Jesus']

Chelsea: 25 players
['Mikel Arteta', 'Raheem Sterling', 'David Raya', 'Leandro Trossard', 'Martin Ødegaard', 'Kepa Arrizabalaga', 'Oleksandr Zinchenko', 'Gabriel Jesus']

Bayern Munich: 25 players
['Mikel Arteta', 'Raheem Sterling', 'David Raya', 'Leandro Trossard', 'Martin Ødegaard', 'Kepa Arrizabalaga', 'Oleksandr Zinchenko', 'Gabriel Jesus']

Barcelona: 25 players
['Mikel Arteta', 'Raheem Sterling', 'David Raya', 'Leandro Trossard', 'Martin Ødegaard', 'Kepa Arrizabalaga', 'Oleksandr Zinchenko', 'Gabriel Jesus']


In [14]:
import httpx, pandas as pd, time

API = "https://www.thesportsdb.com/api/v1/json/3"

def GET(path, **params):
    # add ts to bypass edge cache (may not always help, but harmless)
    params = {**params, "ts": f"{time.time():.6f}"}
    r = httpx.get(f"{API}/{path}", params=params, timeout=30)
    r.raise_for_status()
    d = r.json()
    # normalize empty payloads like {"eventstats": None} → {}
    if isinstance(d, dict) and d and all(v is None for v in d.values()):
        return {}
    return d or {}

def team_id(team_name: str) -> str:
    d = GET("searchteams.php", t=team_name)
    teams = d.get("teams") or []
    if not teams: 
        raise ValueError(f"Team not found: {team_name}")
    return str(teams[0]["idTeam"])

def last_events_for_team(tid: str, k: int = 5):
    d = GET("eventslast.php", id=tid)
    evs = (d.get("results") or [])[:k]
    return evs

def lineup_for_event(evid: str):
    d = GET("lookuplineup.php", id=evid)
    return d.get("lineup") or []

def lineup_to_players(lineup_rows):
    # Each row has: idPlayer, strPlayer, strPosition, strTeam, strHome, strSubstitute, etc.
    cols = ["idPlayer","strPlayer","strPosition","strTeam","strHome","strSubstitute"]
    df = pd.DataFrame(lineup_rows)[cols].dropna(subset=["idPlayer","strPlayer"]).drop_duplicates("idPlayer")
    return df

In [15]:
clubs = ["Arsenal", "Chelsea", "Bayern Munich", "Barcelona"]

all_squads = {}
for club in clubs:
    try:
        tid = team_id(club)
        evs = last_events_for_team(tid, k=6)  # union of last 6 matches
        roster_df = pd.DataFrame(columns=["idPlayer","strPlayer","strPosition","strTeam","strHome","strSubstitute"])
        for ev in evs:
            evid = str(ev["idEvent"])
            lu = lineup_for_event(evid)
            df = lineup_to_players(lu)
            roster_df = pd.concat([roster_df, df], ignore_index=True)
        roster_df = roster_df.drop_duplicates("idPlayer").reset_index(drop=True)
        all_squads[club] = roster_df
        print(f"{club}: {len(roster_df)} unique players from recent lineups")
        display(roster_df.head(12))
    except Exception as e:
        print(f"{club}: ERROR {e}")

Arsenal: 22 unique players from recent lineups


Unnamed: 0,idPlayer,strPlayer,strPosition,strTeam,strHome,strSubstitute
0,34161548,Matty Cash,Midfielder,Aston Villa,Yes,No
1,34147598,Tyrone Mings,Defender,Aston Villa,Yes,No
2,34145735,Matt Targett,Defender,Aston Villa,Yes,No
3,34161348,Douglas Luiz,Midfielder,Aston Villa,Yes,No
4,34167707,Trézéguet,Midfielder,Aston Villa,Yes,No
5,34145837,Ross Barkley,Attacking Midfielder,Aston Villa,Yes,No
6,34145477,Jack Grealish,Midfielder,Aston Villa,Yes,No
7,34157367,Ollie Watkins,Forward,Aston Villa,Yes,No
8,34145423,Emiliano Martinez,Goalkeeper,Aston Villa,Yes,No
9,34161727,Ezri Konsa Ngoyo,Defender,Aston Villa,Yes,No


Chelsea: 22 unique players from recent lineups


Unnamed: 0,idPlayer,strPlayer,strPosition,strTeam,strHome,strSubstitute
0,34161548,Matty Cash,Midfielder,Aston Villa,Yes,No
1,34147598,Tyrone Mings,Defender,Aston Villa,Yes,No
2,34145735,Matt Targett,Defender,Aston Villa,Yes,No
3,34161348,Douglas Luiz,Midfielder,Aston Villa,Yes,No
4,34167707,Trézéguet,Midfielder,Aston Villa,Yes,No
5,34145837,Ross Barkley,Attacking Midfielder,Aston Villa,Yes,No
6,34145477,Jack Grealish,Midfielder,Aston Villa,Yes,No
7,34157367,Ollie Watkins,Forward,Aston Villa,Yes,No
8,34145423,Emiliano Martinez,Goalkeeper,Aston Villa,Yes,No
9,34161727,Ezri Konsa Ngoyo,Defender,Aston Villa,Yes,No


Bayern Munich: 22 unique players from recent lineups


Unnamed: 0,idPlayer,strPlayer,strPosition,strTeam,strHome,strSubstitute
0,34161548,Matty Cash,Midfielder,Aston Villa,Yes,No
1,34147598,Tyrone Mings,Defender,Aston Villa,Yes,No
2,34145735,Matt Targett,Defender,Aston Villa,Yes,No
3,34161348,Douglas Luiz,Midfielder,Aston Villa,Yes,No
4,34167707,Trézéguet,Midfielder,Aston Villa,Yes,No
5,34145837,Ross Barkley,Attacking Midfielder,Aston Villa,Yes,No
6,34145477,Jack Grealish,Midfielder,Aston Villa,Yes,No
7,34157367,Ollie Watkins,Forward,Aston Villa,Yes,No
8,34145423,Emiliano Martinez,Goalkeeper,Aston Villa,Yes,No
9,34161727,Ezri Konsa Ngoyo,Defender,Aston Villa,Yes,No


Barcelona: 22 unique players from recent lineups


Unnamed: 0,idPlayer,strPlayer,strPosition,strTeam,strHome,strSubstitute
0,34161548,Matty Cash,Midfielder,Aston Villa,Yes,No
1,34147598,Tyrone Mings,Defender,Aston Villa,Yes,No
2,34145735,Matt Targett,Defender,Aston Villa,Yes,No
3,34161348,Douglas Luiz,Midfielder,Aston Villa,Yes,No
4,34167707,Trézéguet,Midfielder,Aston Villa,Yes,No
5,34145837,Ross Barkley,Attacking Midfielder,Aston Villa,Yes,No
6,34145477,Jack Grealish,Midfielder,Aston Villa,Yes,No
7,34157367,Ollie Watkins,Forward,Aston Villa,Yes,No
8,34145423,Emiliano Martinez,Goalkeeper,Aston Villa,Yes,No
9,34161727,Ezri Konsa Ngoyo,Defender,Aston Villa,Yes,No


In [10]:
# Roster per club using the global index (no lineup endpoint)
def resolve_team_id(team_name: str) -> tuple[str, str]:
    d = GET("searchteams.php", t=team_name)
    teams = d.get("teams") or []
    if not teams:
        raise ValueError(f"Team not found: {team_name}")
    name_l = team_name.strip().lower()
    exact = [t for t in teams if (t.get("strTeam") or "").strip().lower() == name_l]
    pick = exact[0] if exact else teams[0]
    return str(pick["idTeam"]), (pick.get("strTeam") or team_name)

def roster_from_index(players_df: pd.DataFrame, team_query: str) -> pd.DataFrame:
    tid, canon = resolve_team_id(team_query)
    # Prefer idTeam filter when present
    df = players_df.copy()
    have_id = df.get("idTeam").notna() if "idTeam" in df.columns else None
    if have_id is not None and have_id.any():
        by_id = df[df["idTeam"].astype(str) == tid]
    else:
        by_id = pd.DataFrame(columns=df.columns)

    # Fallback: match strTeam exactly (some rows have strTeam but not idTeam)
    have_name = df.get("strTeam").notna() if "strTeam" in df.columns else None
    if have_name is not None and have_name.any():
        by_name = df[df["strTeam"].str.strip().str.lower() == canon.lower()]
    else:
        by_name = pd.DataFrame(columns=df.columns)

    out = pd.concat([by_id, by_name], ignore_index=True).drop_duplicates(subset=["idPlayer"])
    # select common fields for display
    keep = [c for c in ["idPlayer","strPlayer","strTeam","idTeam","strPosition","strNationality","dateBorn"] if c in out.columns]
    return out[keep].reset_index(drop=True)

for club in ["Arsenal", "Chelsea", "Bayern Munich", "Barcelona"]:
    roster = roster_from_index(players_df, club)
    print(f"{club}: {len(roster)} players from global index")
    display(roster.head(15))

NameError: name 'players_df' is not defined

In [2]:
# Cell B — run the sanity-check for a few clubs
clubs = ["Arsenal", "Chelsea", "Bayern Munich", "Barcelona"]

for club in clubs:
    tid, canon = resolve_team_id(club)
    evid, title = last_event_id_for_team(tid)
    lu = lineup_for_event(evid)
    df = pd.DataFrame(lu)
    teams_in_lineup = sorted(set(df.get("strTeam", pd.Series(dtype=str)).dropna().tolist()))
    print(f"\n[{club}] → resolved_id={tid} | last_event={evid} «{title}»")
    print("Teams present in lineup payload:", teams_in_lineup[:10])
    print("Rows:", len(df))


[Arsenal] → resolved_id=133604 | last_event=2290379 «Arsenal vs Athletic Bilbao»
Teams present in lineup payload: ['Aston Villa', 'Liverpool']
Rows: 22

[Chelsea] → resolved_id=133610 | last_event=2267080 «Chelsea vs Crystal Palace»
Teams present in lineup payload: ['Aston Villa', 'Liverpool']
Rows: 22

[Bayern Munich] → resolved_id=133664 | last_event=2276638 «Bayern Munich vs RB Leipzig»
Teams present in lineup payload: ['Aston Villa', 'Liverpool']
Rows: 22

[Barcelona] → resolved_id=133739 | last_event=2307633 «Barcelona vs Como»
Teams present in lineup payload: ['Aston Villa', 'Liverpool']
Rows: 22


In [3]:
import hashlib, json

def raw_lineup(event_id: str) -> str:
    r = httpx.get(f"{API}/lookuplineup.php", params={"id": event_id, "ts": f"{time.time():.6f}"}, timeout=30)
    r.raise_for_status()
    return r.text

def sha1(s: str) -> str:
    return hashlib.sha1(s.encode("utf-8")).hexdigest()

for club in clubs:
    tid, _ = resolve_team_id(club)
    evid, title = last_event_id_for_team(tid)
    txt = raw_lineup(evid)
    print(f"{club:<12} event={evid} sha1={sha1(txt)} len={len(txt)} title={title}")

Arsenal      event=2290379 sha1=c18ef665489edcc9c2647d6bb2dfb23b339cc365 len=11057 title=Arsenal vs Athletic Bilbao
Chelsea      event=2267080 sha1=c18ef665489edcc9c2647d6bb2dfb23b339cc365 len=11057 title=Chelsea vs Crystal Palace
Bayern Munich event=2276638 sha1=c18ef665489edcc9c2647d6bb2dfb23b339cc365 len=11057 title=Bayern Munich vs RB Leipzig
Barcelona    event=2307633 sha1=c18ef665489edcc9c2647d6bb2dfb23b339cc365 len=11057 title=Barcelona vs Como


In [7]:
def squad_from_lineups(team_name: str, last_n: int = 6) -> pd.DataFrame:
    tid, canon = resolve_team_id(team_name)
    d = GET("eventslast.php", id=tid)
    evs = (d.get("results") or [])[:last_n]
    rows = []
    for ev in evs:
        evid = str(ev["idEvent"])
        lu = lineup_for_event(evid)
        rows.extend(lu)
    df = pd.DataFrame(rows)
    # keep key fields; drop foreign teams’ players if any weirdness
    keep = ["idPlayer","strPlayer","strPosition","strTeam","strHome","strSubstitute"]
    df = df[keep].dropna(subset=["idPlayer","strPlayer"]).drop_duplicates("idPlayer")
    # heuristic: keep only rows where strTeam equals the canon team name, if present
    if "strTeam" in df.columns and canon:
        df = df[df["strTeam"].fillna("").str.lower() == canon.lower()]
    return df.reset_index(drop=True)

for club in ["Arsenal","Chelsea","Bayern Munich","Barcelona"]:
    roster = squad_from_lineups(club, last_n=6)
    print(f"{club}: {len(roster)} players")
    display(roster.head(12))

Arsenal: 0 players


Unnamed: 0,idPlayer,strPlayer,strPosition,strTeam,strHome,strSubstitute


Chelsea: 0 players


Unnamed: 0,idPlayer,strPlayer,strPosition,strTeam,strHome,strSubstitute


Bayern Munich: 0 players


Unnamed: 0,idPlayer,strPlayer,strPosition,strTeam,strHome,strSubstitute


Barcelona: 0 players


Unnamed: 0,idPlayer,strPlayer,strPosition,strTeam,strHome,strSubstitute


In [11]:
# Roster per club using the global index (no lineup endpoint)
def resolve_team_id(team_name: str) -> tuple[str, str]:
    d = GET("searchteams.php", t=team_name)
    teams = d.get("teams") or []
    if not teams:
        raise ValueError(f"Team not found: {team_name}")
    name_l = team_name.strip().lower()
    exact = [t for t in teams if (t.get("strTeam") or "").strip().lower() == name_l]
    pick = exact[0] if exact else teams[0]
    return str(pick["idTeam"]), (pick.get("strTeam") or team_name)

def roster_from_index(players_df: pd.DataFrame, team_query: str) -> pd.DataFrame:
    tid, canon = resolve_team_id(team_query)
    # Prefer idTeam filter when present
    df = players_df.copy()
    have_id = df.get("idTeam").notna() if "idTeam" in df.columns else None
    if have_id is not None and have_id.any():
        by_id = df[df["idTeam"].astype(str) == tid]
    else:
        by_id = pd.DataFrame(columns=df.columns)

    # Fallback: match strTeam exactly (some rows have strTeam but not idTeam)
    have_name = df.get("strTeam").notna() if "strTeam" in df.columns else None
    if have_name is not None and have_name.any():
        by_name = df[df["strTeam"].str.strip().str.lower() == canon.lower()]
    else:
        by_name = pd.DataFrame(columns=df.columns)

    out = pd.concat([by_id, by_name], ignore_index=True).drop_duplicates(subset=["idPlayer"])
    # select common fields for display
    keep = [c for c in ["idPlayer","strPlayer","strTeam","idTeam","strPosition","strNationality","dateBorn"] if c in out.columns]
    return out[keep].reset_index(drop=True)

for club in ["Arsenal", "Chelsea", "Bayern Munich", "Barcelona"]:
    roster = roster_from_index(players_df, club)
    print(f"{club}: {len(roster)} players from global index")
    display(roster.head(15))

NameError: name 'players_df' is not defined