# PL Top Goalscorer and Assister

In [1]:
import pandas as pd
import time
import random

# Dictionary mapping team names to their FBref squad URLs
team_url_map = {
    "Arsenal": "https://fbref.com/en/squads/18bb7c10/Arsenal-Stats",
    "Aston Villa": "https://fbref.com/en/squads/8602292d/Aston-Villa-Stats",
    "Bournemouth": "https://fbref.com/en/squads/4ba7cbea/Bournemouth-Stats",
    "Brentford": "https://fbref.com/en/squads/cd051869/Brentford-Stats",
    "Brighton & Hove Albion": "https://fbref.com/en/squads/d07537b9/Brighton-and-Hove-Albion-Stats",
    "Chelsea": "https://fbref.com/en/squads/cff3d9bb/Chelsea-Stats",
    "Crystal Palace": "https://fbref.com/en/squads/47c64c55/Crystal-Palace-Stats",
    "Everton": "https://fbref.com/en/squads/d3fd31cc/Everton-Stats",
    "Fulham": "https://fbref.com/en/squads/fd962109/Fulham-Stats",
    "Ipswich Town": "https://fbref.com/en/squads/b74092de/Ipswich-Town-Stats",
    "Leicester City": "https://fbref.com/en/squads/a2d435b3/Leicester-City-Stats",
    "Liverpool": "https://fbref.com/en/squads/822bd0ba/Liverpool-Stats",
    "Manchester City": "https://fbref.com/en/squads/b8fd03ef/Manchester-City-Stats",
    "Manchester United": "https://fbref.com/en/squads/19538871/Manchester-United-Stats",
    "Newcastle United": "https://fbref.com/en/squads/b2b47a98/Newcastle-United-Stats",
    "Nottingham Forest": "https://fbref.com/en/squads/e4a775cb/Nottingham-Forest-Stats",
    "Southampton": "https://fbref.com/en/squads/33c895d4/Southampton-Stats",
    "Tottenham Hotspur": "https://fbref.com/en/squads/361ca564/Tottenham-Hotspur-Stats",
    "West Ham United": "https://fbref.com/en/squads/7c21e445/West-Ham-United-Stats",
    "Wolverhampton Wanderers": "https://fbref.com/en/squads/8cec06e1/Wolverhampton-Wanderers-Stats"
}

In [2]:
df = pd.DataFrame(columns=["Team", "Top Goalscorer", "Goals", "Top Assister", "Assists"])

# Loop over each team and scrape data
for idx, team in enumerate(team_url_map.keys()):
    try:
        url = team_url_map[team]
        data = pd.read_html(url, attrs={"id": "stats_standard_9"})[0]

        # Handle multi-level columns
        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(-1)

        # Drop duplicated columns
        data = data.loc[:, ~data.columns.duplicated()]

        # Remove rows like 'Squad Total', 'Opponent Total'
        data = data[~data["Player"].str.contains("Total", na=False)]

        # Keep only valid numeric values
        data = data[pd.to_numeric(data["Gls"], errors="coerce").notna()]
        data["Gls"] = data["Gls"].astype(float)
        data["Ast"] = data["Ast"].astype(float)

        # Extract top scorer and assister
        top_goalscorer = data.sort_values(by="Gls", ascending=False).iloc[0]["Player"]
        top_goals = data.sort_values(by="Gls", ascending=False).iloc[0]["Gls"]
        top_assister = data.sort_values(by="Ast", ascending=False).iloc[0]["Player"]
        top_assists = data.sort_values(by="Ast", ascending=False).iloc[0]["Ast"]

        # Add to result dataframe
        df.at[idx, "Team"] = team
        df.at[idx, "Top Goalscorer"] = top_goalscorer
        df.at[idx, "Goals"] = top_goals
        df.at[idx, "Top Assister"] = top_assister
        df.at[idx, "Assists"] = top_assists

        print(f"✔ Processed {team}")
        time.sleep(random.uniform(4, 8))  # Pause to avoid getting blocked

    except Exception as e:
        print(f"❌ Failed to process {team}: {e}")

# Show results
print("\n✅ Final Result:")
print(df.head())

✔ Processed Arsenal
✔ Processed Aston Villa
✔ Processed Bournemouth
✔ Processed Brentford
✔ Processed Brighton & Hove Albion
✔ Processed Chelsea
✔ Processed Crystal Palace
✔ Processed Everton
✔ Processed Fulham
✔ Processed Ipswich Town
✔ Processed Leicester City
✔ Processed Liverpool
✔ Processed Manchester City
✔ Processed Manchester United
✔ Processed Newcastle United
✔ Processed Nottingham Forest
✔ Processed Southampton
✔ Processed Tottenham Hotspur
✔ Processed West Ham United
✔ Processed Wolverhampton Wanderers

✅ Final Result:
                     Team   Top Goalscorer Goals      Top Assister Assists
0                 Arsenal      Kai Havertz   9.0       Bukayo Saka    10.0
1             Aston Villa    Ollie Watkins  16.0     Morgan Rogers    10.0
2             Bournemouth  Justin Kluivert  12.0   Justin Kluivert     6.0
3               Brentford     Bryan Mbeumo  19.0  Mikkel Damsgaard    10.0
4  Brighton & Hove Albion    Danny Welbeck  10.0        João Pedro     6.0


In [None]:
df.to_csv("../../../data/teams/raw/goals_assists/pl_top_scorers_assisters.csv", index=False)