# Bundesliga Recent Form

In [1]:
import pandas as pd
import time
import random

# Dictionary mapping team names to their FBref squad URLs
team_url_map = {
    "FC Augsburg": "https://fbref.com/en/squads/0cdc4311/FC-Augsburg-Stats",
    "Union Berlin": "https://fbref.com/en/squads/7a41008f/Union-Berlin-Stats",
    "VfL Bochum": "https://fbref.com/en/squads/b42c6323/VfL-Bochum-Stats",
    "Werder Bremen": "https://fbref.com/en/squads/62add3bf/Werder-Bremen-Stats",
    "Borussia Dortmund": "https://fbref.com/en/squads/add600ae/Borussia-Dortmund-Stats",
    "Eintracht Frankfurt": "https://fbref.com/en/squads/f0ac8ee6/Eintracht-Frankfurt-Stats",
    "SC Freiburg": "https://fbref.com/en/squads/a486e511/SC-Freiburg-Stats",
    "1. FC Heidenheim": "https://fbref.com/en/squads/18d9d2a7/Heidenheim-Stats",
    "TSG Hoffenheim": "https://fbref.com/en/squads/033ea6b8/TSG-Hoffenheim-Stats",
    "Holstein Kiel": "https://fbref.com/en/squads/2ac661d9/Holstein-Kiel-Stats",
    "RB Leipzig": "https://fbref.com/en/squads/acbb6a5b/RB-Leipzig-Stats",
    "Bayer Leverkusen": "https://fbref.com/en/squads/c7a9f859/Bayer-Leverkusen-Stats",
    "Mainz 05": "https://fbref.com/en/squads/a224b06a/Mainz-Stats",
    "Borussia Monchengladbach": "https://fbref.com/en/squads/32f3ee20/Borussia-Monchengladbach-Stats",
    "Bayern Munich": "https://fbref.com/en/squads/054efa67/Bayern-Munich-Stats",
    "FC St. Pauli": "https://fbref.com/en/squads/54864664/St-Pauli-Stats",
    "VfB Stuttgart": "https://fbref.com/en/squads/598bc722/VfB-Stuttgart-Stats",
    "VfL Wolfsburg": "https://fbref.com/en/squads/4eaa11d7/VfL-Wolfsburg-Stats"
}

In [2]:
df = pd.DataFrame(columns=["Team", "Recent Form"])

for idx, (team, url) in enumerate(team_url_map.items()):
    try:
        # Read the match logs table (no filtering by competition)
        tables = pd.read_html(url, attrs={"id": "matchlogs_for"})
        matchlogs = tables[0]

        # Flatten multi-index if it exists
        if isinstance(matchlogs.columns, pd.MultiIndex):
            matchlogs.columns = matchlogs.columns.get_level_values(-1)

        # Remove rows with no result (i.e., future games)
        matchlogs = matchlogs.dropna(subset=["Result"])

        # Get the last 5 results
        recent_results = matchlogs.head(5)["Result"].tolist()

        # Join the results into a single string
        form_str = "".join(recent_results)

        df.at[idx, "Team"] = team
        df.at[idx, "Recent Form"] = form_str

        print(f"✔ Processed {team} - Form: {form_str}")
        time.sleep(random.uniform(4, 8))

    except Exception as e:
        print(f"❌ Failed to process {team}: {e}")


# Show results
print("\n✅ Final Result:")
print(df.head())

✔ Processed FC Augsburg - Form: WDLWL
✔ Processed Union Berlin - Form: WDWDW
✔ Processed VfL Bochum - Form: LLLLD
✔ Processed Werder Bremen - Form: WDDWL
✔ Processed Borussia Dortmund - Form: WWDWW
✔ Processed Eintracht Frankfurt - Form: WLWWW
✔ Processed SC Freiburg - Form: WWLWW
✔ Processed 1. FC Heidenheim - Form: WWWWW
✔ Processed TSG Hoffenheim - Form: DWLLL
✔ Processed Holstein Kiel - Form: WLLLD
✔ Processed RB Leipzig - Form: WWWDL
✔ Processed Bayer Leverkusen - Form: DWWLW
✔ Processed Mainz 05 - Form: WDDLW
✔ Processed Borussia Monchengladbach - Form: WLWLL
✔ Processed Bayern Munich - Form: WWWWW
✔ Processed FC St. Pauli - Form: WLLLD
✔ Processed VfB Stuttgart - Form: DLWDW
✔ Processed VfL Wolfsburg - Form: WLWLL

✅ Final Result:
                Team Recent Form
0        FC Augsburg       WDLWL
1       Union Berlin       WDWDW
2         VfL Bochum       LLLLD
3      Werder Bremen       WDDWL
4  Borussia Dortmund       WWDWW


In [3]:
df.to_csv("../../../data/teams/raw/recent_form/bundesliga_recent_form.csv", index=False)