In [1]:
import requests
import pandas as pd
import time
import random

TEAM_IDS = {
    "Arsenal": "18bb7c10",
    "Aston Villa": "8602292d",
    "Bournemouth": "4ba7cbea",
    "Brentford": "cd051869",
    "Brighton": "d07537b9",
    "Chelsea": "cff3d9bb",
    "Crystal Palace": "47c64c55",
    "Everton": "d3fd31cc",
    "Fulham": "fd962109",
    "Ipswich Town": "b74092de",
    "Leicester City": "a2d435b3",
    "Liverpool": "822bd0ba",
    "Manchester City": "b8fd03ef",
    "Manchester United": "19538871",
    "Newcastle Utd": "b2b47a98",
    "Nottingham Forest": "e4a775cb",
    "Southampton": "33c895d4",
    "Tottenham": "361ca564",
    "West Ham": "7c21e445",
    "Wolves": "8cec06e1"
}

HEADERS = {
    "User-Agent": "Mozilla/5.0"
}

def get_team_form_and_goals(team_name, team_id):
    url = f"https://fbref.com/en/squads/{team_id}/{team_name.replace(' ', '-')}-Match-Logs"
    try:
        res = requests.get(url, headers=HEADERS)
        time.sleep(random.uniform(4, 10))
        table = pd.read_html(res.url, attrs={"id": "matchlogs_for"})[0]
        table = table.tail(5)

        # Drop NaNs, convert to strings, keep only valid chars
        results = table["Result"].dropna().astype(str).str.strip().str[0]
        form = "".join(results[results.isin(["W", "D", "L"])].head(5))

        goals_scored = pd.to_numeric(table["GF"], errors="coerce").fillna(0).astype(int).sum()
        goals_conceded = pd.to_numeric(table["GA"], errors="coerce").fillna(0).astype(int).sum()
        goal_difference = goals_scored - goals_conceded
        return form, goals_scored, goals_conceded, goal_difference
    except Exception as e:
        print(f"❌ Failed form/goals for {team_name}: {e}")
        return "", 0, 0, 0

def get_top_scorer_and_assister(team_name, team_id):
    url = f"https://fbref.com/en/squads/{team_id}/{team_name.replace(' ', '-')}-Stats"
    try:
        res = requests.get(url, headers=HEADERS)
        time.sleep(random.uniform(4, 10))
        tables = pd.read_html(res.text)

        # Find the correct table
        df = next((t for t in tables if {"Player", "Gls", "Ast"}.issubset(t.columns)), None)
        if df is None:
            raise ValueError("No valid player stats table found.")

        if "Pos" in df.columns:
            df = df[df["Pos"] != "GK"]

        top_scorer = df.sort_values(by="Gls", ascending=False).iloc[0]["Player"]
        top_assister = df.sort_values(by="Ast", ascending=False).iloc[0]["Player"]
        return top_scorer, top_assister
    except Exception as e:
        print(f"❌ Failed top scorer/assister for {team_name}: {e}")
        return "", ""

# Scrape all teams
team_data = []

for team, team_id in TEAM_IDS.items():
    print(f"🔍 Scraping: {team}")
    form, scored, conceded, diff = get_team_form_and_goals(team, team_id)
    scorer, assister = get_top_scorer_and_assister(team, team_id)
    team_data.append({
        "Team": team,
        "Form": form,
        "Goals Scored (Last 5)": scored,
        "Goals Conceded (Last 5)": conceded,
        "Goal Difference (Last 5)": diff,
        "Top Scorer": scorer,
        "Top Assister": assister
    })
    time.sleep(random.uniform(4, 10))

# Final table
final_df = pd.DataFrame(team_data)
final_df.head()

🔍 Scraping: Arsenal


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Arsenal: No tables found
🔍 Scraping: Aston Villa


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Aston Villa: No tables found
🔍 Scraping: Bournemouth


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Bournemouth: No tables found
🔍 Scraping: Brentford


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Brentford: No tables found
🔍 Scraping: Brighton


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Brighton: No tables found
🔍 Scraping: Chelsea


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Chelsea: No tables found
🔍 Scraping: Crystal Palace


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Crystal Palace: No tables found
🔍 Scraping: Everton


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Everton: No tables found
🔍 Scraping: Fulham


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Fulham: No tables found
🔍 Scraping: Ipswich Town


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Ipswich Town: No tables found
🔍 Scraping: Leicester City


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Leicester City: No tables found
🔍 Scraping: Liverpool


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Liverpool: No tables found
🔍 Scraping: Manchester City


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Manchester City: No tables found
🔍 Scraping: Manchester United


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Manchester United: No tables found
🔍 Scraping: Newcastle Utd


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Newcastle Utd: No tables found
🔍 Scraping: Nottingham Forest


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Nottingham Forest: No tables found
🔍 Scraping: Southampton


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Southampton: No tables found
🔍 Scraping: Tottenham


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Tottenham: No tables found
🔍 Scraping: West Ham


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for West Ham: No tables found
🔍 Scraping: Wolves


  tables = pd.read_html(res.text)


❌ Failed top scorer/assister for Wolves: No tables found


Unnamed: 0,Team,Form,Goals Scored (Last 5),Goals Conceded (Last 5),Goal Difference (Last 5),Top Scorer,Top Assister
0,Arsenal,LLD,4,6,-2,,
1,Aston Villa,LWW,2,3,-1,,
2,Bournemouth,DWL,3,3,0,,
3,Brentford,WWW,7,3,4,,
4,Brighton,WDW,6,3,3,,


In [None]:
final_df.to_csv("pl_recent_form_and_stars.csv", index=False)
print("✅ Saved to pl_recent_form_and_stars.csv")