# La Liga Top Goalscorer and Assister

In [1]:
import pandas as pd
import time
import random

# Dictionary mapping team names to their FBref squad URLs
team_url_map = {
    "Alaves": "https://fbref.com/en/squads/8d6fd021/Alaves-Stats",
    "Athletic Bilbao": "https://fbref.com/en/squads/2b390eca/Athletic-Club-Stats",
    "Atletico Madrid": "https://fbref.com/en/squads/db3b9613/Atletico-Madrid-Stats",
    "Barcelona": "https://fbref.com/en/squads/206d90db/Barcelona-Stats",
    "Celta Vigo": "https://fbref.com/en/squads/f25da7fb/Celta-Vigo-Stats",
    "Espanyol": "https://fbref.com/en/squads/a8661628/Espanyol-Stats",
    "Getafe": "https://fbref.com/en/squads/7848bd64/Getafe-Stats",
    "Girona": "https://fbref.com/en/squads/9024a00a/Girona-Stats",
    "Las Palmas": "https://fbref.com/en/squads/0049d422/Las-Palmas-Stats",
    "Leganes": "https://fbref.com/en/squads/7c6f2c78/Leganes-Stats",
    "Mallorca": "https://fbref.com/en/squads/2aa12281/Mallorca-Stats",
    "Osasuna": "https://fbref.com/en/squads/03c57e2b/Osasuna-Stats",
    "Rayo Vallecano": "https://fbref.com/en/squads/98e8af82/Rayo-Vallecano-Stats",
    "Real Betis": "https://fbref.com/en/squads/fc536746/Real-Betis-Stats",
    "Real Madrid": "https://fbref.com/en/squads/53a2f082/Real-Madrid-Stats",
    "Real Sociedad": "https://fbref.com/en/squads/e31d1cd9/Real-Sociedad-Stats",
    "Sevilla": "https://fbref.com/en/squads/ad2be733/Sevilla-Stats",
    "Valencia": "https://fbref.com/en/squads/dcc91a7b/Valencia-Stats",
    "Valladolid": "https://fbref.com/en/squads/17859612/Valladolid-Stats",
    "Villarreal": "https://fbref.com/en/squads/2a8183b3/Villarreal-Stats"
}

In [2]:
df = pd.DataFrame(columns=["Team", "Top Goalscorer", "Goals", "Top Assister", "Assists"])

# Loop over each team and scrape data
for idx, team in enumerate(team_url_map.keys()):
    try:
        url = team_url_map[team]
        data = pd.read_html(url, attrs={"id": "stats_standard_12"})[0]

        # Handle multi-level columns
        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(-1)

        # Drop duplicated columns
        data = data.loc[:, ~data.columns.duplicated()]

        # Remove rows like 'Squad Total', 'Opponent Total'
        data = data[~data["Player"].str.contains("Total", na=False)]

        # Keep only valid numeric values
        data = data[pd.to_numeric(data["Gls"], errors="coerce").notna()]
        data["Gls"] = data["Gls"].astype(float)
        data["Ast"] = data["Ast"].astype(float)

        # Extract top scorer and assister
        top_goalscorer = data.sort_values(by="Gls", ascending=False).iloc[0]["Player"]
        top_goals = data.sort_values(by="Gls", ascending=False).iloc[0]["Gls"]
        top_assister = data.sort_values(by="Ast", ascending=False).iloc[0]["Player"]
        top_assists = data.sort_values(by="Ast", ascending=False).iloc[0]["Ast"]

        # Add to result dataframe
        df.at[idx, "Team"] = team
        df.at[idx, "Top Goalscorer"] = top_goalscorer
        df.at[idx, "Goals"] = top_goals
        df.at[idx, "Top Assister"] = top_assister
        df.at[idx, "Assists"] = top_assists

        print(f"✔ Processed {team}")
        time.sleep(random.uniform(4, 8))  # Pause to avoid getting blocked

    except Exception as e:
        print(f"❌ Failed to process {team}: {e}")

# Show results
print("\n✅ Final Result:")
print(df.head())

✔ Processed Alaves
✔ Processed Athletic Bilbao
✔ Processed Atletico Madrid
✔ Processed Barcelona
✔ Processed Celta Vigo
✔ Processed Espanyol
✔ Processed Getafe
✔ Processed Girona
✔ Processed Las Palmas
✔ Processed Leganes
✔ Processed Mallorca
✔ Processed Osasuna
✔ Processed Rayo Vallecano
✔ Processed Real Betis
✔ Processed Real Madrid
✔ Processed Real Sociedad
✔ Processed Sevilla
✔ Processed Valencia
✔ Processed Valladolid
✔ Processed Villarreal

✅ Final Result:
              Team      Top Goalscorer Goals       Top Assister Assists
0           Alaves                Kiké  13.0     Carlos Vicente     5.0
1  Athletic Bilbao        Oihan Sancet  15.0     Iñaki Williams     8.0
2  Atletico Madrid   Alexander Sørloth  17.0  Antoine Griezmann     7.0
3        Barcelona  Robert Lewandowski  25.0       Lamine Yamal    13.0
4       Celta Vigo      Borja Iglesias  10.0     Óscar Mingueza     6.0


In [None]:
df.to_csv("../../../data/teams/raw/goals_assists/la_liga_top_scorers_assisters.csv", index=False)