# Ligue 1 Top Goalscorer and Assister

In [1]:
import pandas as pd
import time
import random

# Dictionary mapping team names to their FBref squad URLs
team_url_map = {
    "Angers SCO": "https://fbref.com/en/squads/69236f98/Angers-Stats",
    "AJ Auxerre": "https://fbref.com/en/squads/5ae09109/Auxerre-Stats",
    "Stade Brestois 29": "https://fbref.com/en/squads/fb08dbb3/Brest-Stats",
    "Le Havre AC": "https://fbref.com/en/squads/5c2737db/Le-Havre-Stats",
    "RC Lens": "https://fbref.com/en/squads/fd4e0f7d/Lens-Stats",
    "LOSC Lille": "https://fbref.com/en/squads/cb188c0c/Lille-Stats",
    "Olympique Lyonnais": "https://fbref.com/en/squads/d53c0b06/Lyon-Stats",
    "Olympique de Marseille": "https://fbref.com/en/squads/5725cc7b/Marseille-Stats",
    "AS Monaco": "https://fbref.com/en/squads/fd6114db/Monaco-Stats",
    "Montpellier HSC": "https://fbref.com/en/squads/281b0e73/Montpellier-Stats",
    "FC Nantes": "https://fbref.com/en/squads/d7a486cd/Nantes-Stats",
    "OGC Nice": "https://fbref.com/en/squads/132ebc33/Nice-Stats",
    "Paris Saint-Germain": "https://fbref.com/en/squads/e2d8892c/Paris-Saint-Germain-Stats",
    "Stade de Reims": "https://fbref.com/en/squads/7fdd64e0/Reims-Stats",
    "Stade Rennais FC": "https://fbref.com/en/squads/b3072e00/Rennes-Stats",
    "AS Saint-Etienne": "https://fbref.com/en/squads/d298ef2c/Saint-Etienne-Stats",
    "RC Strasbourg Alsace": "https://fbref.com/en/squads/c0d3eab4/Strasbourg-Stats",
    "Toulouse FC": "https://fbref.com/en/squads/3f8c4b5f/Toulouse-Stats"
}

In [2]:
df = pd.DataFrame(columns=["Team", "Top Goalscorer", "Goals", "Top Assister", "Assists"])

# Loop over each team and scrape data
for idx, team in enumerate(team_url_map.keys()):
    try:
        url = team_url_map[team]
        data = pd.read_html(url, attrs={"id": "stats_standard_13"})[0]

        # Handle multi-level columns
        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(-1)

        # Drop duplicated columns
        data = data.loc[:, ~data.columns.duplicated()]

        # Remove rows like 'Squad Total', 'Opponent Total'
        data = data[~data["Player"].str.contains("Total", na=False)]

        # Keep only valid numeric values
        data = data[pd.to_numeric(data["Gls"], errors="coerce").notna()]
        data["Gls"] = data["Gls"].astype(float)
        data["Ast"] = data["Ast"].astype(float)

        # Extract top scorer and assister
        top_goalscorer = data.sort_values(by="Gls", ascending=False).iloc[0]["Player"]
        top_goals = data.sort_values(by="Gls", ascending=False).iloc[0]["Gls"]
        top_assister = data.sort_values(by="Ast", ascending=False).iloc[0]["Player"]
        top_assists = data.sort_values(by="Ast", ascending=False).iloc[0]["Ast"]

        # Add to result dataframe
        df.at[idx, "Team"] = team
        df.at[idx, "Top Goalscorer"] = top_goalscorer
        df.at[idx, "Goals"] = top_goals
        df.at[idx, "Top Assister"] = top_assister
        df.at[idx, "Assists"] = top_assists

        print(f"✔ Processed {team}")
        time.sleep(random.uniform(4, 8))  # Pause to avoid getting blocked

    except Exception as e:
        print(f"❌ Failed to process {team}: {e}")

# Show results
print("\n✅ Final Result:")
print(df.head())

✔ Processed Angers SCO
✔ Processed AJ Auxerre
✔ Processed Stade Brestois 29
✔ Processed Le Havre AC
✔ Processed RC Lens
✔ Processed LOSC Lille
✔ Processed Olympique Lyonnais
✔ Processed Olympique de Marseille
✔ Processed AS Monaco
✔ Processed Montpellier HSC
✔ Processed FC Nantes
✔ Processed OGC Nice
✔ Processed Paris Saint-Germain
✔ Processed Stade de Reims
✔ Processed Stade Rennais FC
✔ Processed AS Saint-Etienne
✔ Processed RC Strasbourg Alsace
✔ Processed Toulouse FC

✅ Final Result:
                Team   Top Goalscorer Goals          Top Assister Assists
0         Angers SCO   Esteban Lepaul   9.0       Lilian Raolisoa     3.0
1         AJ Auxerre    Gaëtan Perrin  10.0         Gaëtan Perrin    11.0
2  Stade Brestois 29  Ludovic Ajorque  13.0  Mathias Pereira Lage     7.0
3        Le Havre AC  Abdoulaye Touré  10.0         Josué Casimir     4.0
4            RC Lens  Neil El Aynaoui   8.0      Adrien Thomasson     7.0


In [None]:
df.to_csv("../../../data/teams/raw/goals_assists/ligue_1_top_scorers_assisters.csv", index=False)