# Bundesliga Top Goalscorer and Assister

In [None]:
import pandas as pd
import time
import random

# Dictionary mapping team names to their FBref squad URLs
team_url_map = {
    "FC Augsburg": "https://fbref.com/en/squads/0cdc4311/FC-Augsburg-Stats",
    "Union Berlin": "https://fbref.com/en/squads/7a41008f/Union-Berlin-Stats",
    "VfL Bochum": "https://fbref.com/en/squads/b42c6323/VfL-Bochum-Stats",
    "Werder Bremen": "https://fbref.com/en/squads/62add3bf/Werder-Bremen-Stats",
    "Borussia Dortmund": "https://fbref.com/en/squads/add600ae/Borussia-Dortmund-Stats",
    "Eintracht Frankfurt": "https://fbref.com/en/squads/f0ac8ee6/Eintracht-Frankfurt-Stats",
    "SC Freiburg": "https://fbref.com/en/squads/a486e511/SC-Freiburg-Stats",
    "1. FC Heidenheim": "https://fbref.com/en/squads/18d9d2a7/Heidenheim-Stats",
    "TSG Hoffenheim": "https://fbref.com/en/squads/033ea6b8/TSG-Hoffenheim-Stats",
    "Holstein Kiel": "https://fbref.com/en/squads/2ac661d9/Holstein-Kiel-Stats",
    "RB Leipzig": "https://fbref.com/en/squads/acbb6a5b/RB-Leipzig-Stats",
    "Bayer Leverkusen": "https://fbref.com/en/squads/c7a9f859/Bayer-Leverkusen-Stats",
    "Mainz 05": "https://fbref.com/en/squads/a224b06a/Mainz-Stats",
    "Borussia Monchengladbach": "https://fbref.com/en/squads/32f3ee20/Borussia-Monchengladbach-Stats",
    "Bayern Munich": "https://fbref.com/en/squads/054efa67/Bayern-Munich-Stats",
    "FC St. Pauli": "https://fbref.com/en/squads/54864664/St-Pauli-Stats",
    "VfB Stuttgart": "https://fbref.com/en/squads/598bc722/VfB-Stuttgart-Stats",
    "VfL Wolfsburg": "https://fbref.com/en/squads/4eaa11d7/VfL-Wolfsburg-Stats"
}

In [2]:
df = pd.DataFrame(columns=["Team", "Top Goalscorer", "Goals", "Top Assister", "Assists"])

# Loop over each team and scrape data
for idx, team in enumerate(team_url_map.keys()):
    try:
        url = team_url_map[team]
        data = pd.read_html(url, attrs={"id": "stats_standard_20"})[0]

        # Handle multi-level columns
        if isinstance(data.columns, pd.MultiIndex):
            data.columns = data.columns.get_level_values(-1)

        # Drop duplicated columns
        data = data.loc[:, ~data.columns.duplicated()]

        # Remove rows like 'Squad Total', 'Opponent Total'
        data = data[~data["Player"].str.contains("Total", na=False)]

        # Keep only valid numeric values
        data = data[pd.to_numeric(data["Gls"], errors="coerce").notna()]
        data["Gls"] = data["Gls"].astype(float)
        data["Ast"] = data["Ast"].astype(float)

        # Extract top scorer and assister
        top_goalscorer = data.sort_values(by="Gls", ascending=False).iloc[0]["Player"]
        top_goals = data.sort_values(by="Gls", ascending=False).iloc[0]["Gls"]
        top_assister = data.sort_values(by="Ast", ascending=False).iloc[0]["Player"]
        top_assists = data.sort_values(by="Ast", ascending=False).iloc[0]["Ast"]

        # Add to result dataframe
        df.at[idx, "Team"] = team
        df.at[idx, "Top Goalscorer"] = top_goalscorer
        df.at[idx, "Goals"] = top_goals
        df.at[idx, "Top Assister"] = top_assister
        df.at[idx, "Assists"] = top_assists

        print(f"✔ Processed {team}")
        time.sleep(random.uniform(4, 8))  # Pause to avoid getting blocked

    except Exception as e:
        print(f"❌ Failed to process {team}: {e}")

# Show results
print("\n✅ Final Result:")
print(df.head())

✔ Processed FC Augsburg
✔ Processed Union Berlin
✔ Processed VfL Bochum
✔ Processed Werder Bremen
✔ Processed Borussia Dortmund
✔ Processed Eintracht Frankfurt
✔ Processed SC Freiburg
✔ Processed 1. FC Heidenheim
✔ Processed TSG Hoffenheim
✔ Processed Holstein Kiel
✔ Processed RB Leipzig
✔ Processed Bayer Leverkusen
✔ Processed Mainz 05
✔ Processed Borussia Monchengladbach
✔ Processed Bayern Munich
✔ Processed FC St. Pauli
✔ Processed VfB Stuttgart
✔ Processed VfL Wolfsburg

✅ Final Result:
                Team         Top Goalscorer Goals         Top Assister Assists
0        FC Augsburg  Alexis Claude-Maurice   9.0   Jeffrey Gouweleeuw     4.0
1       Union Berlin    Benedict Hollerbach   9.0  Christopher Trimmel     3.0
2         VfL Bochum            Myron Boadu   9.0       Felix Passlack     6.0
3      Werder Bremen             Jens Stage  10.0      Mitchell Weiser     9.0
4  Borussia Dortmund        Serhou Guirassy  21.0        Julian Brandt    10.0


In [None]:
df.to_csv("../../../data/teams/raw/goals_assists/bundesliga_top_scorers_assisters.csv", index=False)