In [18]:
import requests
from bs4 import BeautifulSoup

In [19]:
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.9",
    "Referer": "https://en.wikipedia.org/",
}

In [20]:
def fetch_html(url: str) -> str:
    r = requests.get(url, headers=HEADERS, timeout=30)
    r.raise_for_status()
    return r.text

In [21]:

def scrape_team_logo(team_name: str, size_px: int = 256) -> str | None:
    url = "https://en.wikipedia.org/wiki/" + team_name.replace(" ", "_")
    r = requests.get(url, headers=HEADERS, timeout=30)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")

    infobox = soup.find("table", class_="infobox")
    if not infobox:
        return None

    # âœ… target the dedicated logo cell
    logo_td = infobox.find("td", class_="infobox-image")
    if not logo_td:
        return None

    img = logo_td.find("img")
    if not img or not img.get("src"):
        return None

    src = img["src"]
    if src.startswith("//"):
        src = "https:" + src

    # upscale thumbnail when it's the standard /120px-... URL
    src = src.replace("/120px-", f"/{size_px}px-")
    return src

In [25]:
import json
import time
from pathlib import Path

# --- Team mapping (32) ---
TEAM_WIKI = {
    "ARI": "Arizona Cardinals",
    "ATL": "Atlanta Falcons",
    "BAL": "Baltimore Ravens",
    "BUF": "Buffalo Bills",
    "CAR": "Carolina Panthers",
    "CHI": "Chicago Bears",
    "CIN": "Cincinnati Bengals",
    "CLE": "Cleveland Browns",
    "DAL": "Dallas Cowboys",
    "DEN": "Denver Broncos",
    "DET": "Detroit Lions",
    "GB":  "Green Bay Packers",
    "HOU": "Houston Texans",
    "IND": "Indianapolis Colts",
    "JAX": "Jacksonville Jaguars",
    "KC":  "Kansas City Chiefs",
    "LV":  "Las Vegas Raiders",
    "LAC": "Los Angeles Chargers",
    "LAR": "Los Angeles Rams",
    "MIA": "Miami Dolphins",
    "MIN": "Minnesota Vikings",
    "NE":  "New England Patriots",
    "NO":  "New Orleans Saints",
    "NYG": "New York Giants",
    "NYJ": "New York Jets",
    "PHI": "Philadelphia Eagles",
    "PIT": "Pittsburgh Steelers",
    "SEA": "Seattle Seahawks",
    "SF":  "San Francisco 49ers",
    "TB":  "Tampa Bay Buccaneers",
    "TEN": "Tennessee Titans",
    "WAS": "Washington Commanders",
}

CACHE_FILE = Path("team_logos.json")

# Load existing cache if present
if CACHE_FILE.exists():
    TEAM_LOGOS = json.loads(CACHE_FILE.read_text())
else:
    TEAM_LOGOS = {}

# Fetch only missing / None entries
updated = False
for abbr, title in TEAM_WIKI.items():
    if abbr in TEAM_LOGOS and TEAM_LOGOS[abbr]:
        continue

    try:
        url = scrape_team_logo(title, size_px=256)  # uses your scraper
        TEAM_LOGOS[abbr] = url
        print(f"{abbr}: {'OK' if url else 'None'}")
        updated = True
        time.sleep(0.3)  # be polite to Wikipedia
    except Exception as e:
        TEAM_LOGOS[abbr] = None
        print(f"{abbr}: ERROR -> {e}")
        updated = True
        time.sleep(0.3)

# Write cache
if updated:
    CACHE_FILE.write_text(json.dumps(TEAM_LOGOS, indent=2))
    print(f"\nSaved cache to {CACHE_FILE.resolve()}")
else:
    print("Cache already complete.")


ARI: OK
ATL: OK
BAL: OK
BUF: OK
CAR: OK
CHI: OK
CIN: OK
CLE: OK
DAL: OK
DEN: OK
DET: OK
GB: OK
HOU: OK
IND: OK
JAX: OK
KC: OK
LV: OK
LAC: OK
LAR: OK
MIA: OK
MIN: OK
NE: OK
NO: OK
NYG: OK
NYJ: OK
PHI: OK
PIT: OK
SEA: OK
SF: OK
TB: OK
TEN: OK
WAS: OK

Saved cache to /Users/lucasgarcia/Desktop/nfl-project/Dfs/team_logos.json
