# La Liga Recent Form

In [1]:
import pandas as pd
import time
import random

# Dictionary mapping team names to their FBref squad URLs
team_url_map = {
    "Alaves": "https://fbref.com/en/squads/8d6fd021/Alaves-Stats",
    "Athletic Bilbao": "https://fbref.com/en/squads/2b390eca/Athletic-Club-Stats",
    "Atletico Madrid": "https://fbref.com/en/squads/db3b9613/Atletico-Madrid-Stats",
    "Barcelona": "https://fbref.com/en/squads/206d90db/Barcelona-Stats",
    "Celta Vigo": "https://fbref.com/en/squads/f25da7fb/Celta-Vigo-Stats",
    "Espanyol": "https://fbref.com/en/squads/a8661628/Espanyol-Stats",
    "Getafe": "https://fbref.com/en/squads/7848bd64/Getafe-Stats",
    "Girona": "https://fbref.com/en/squads/9024a00a/Girona-Stats",
    "Las Palmas": "https://fbref.com/en/squads/0049d422/Las-Palmas-Stats",
    "Leganes": "https://fbref.com/en/squads/7c6f2c78/Leganes-Stats",
    "Mallorca": "https://fbref.com/en/squads/2aa12281/Mallorca-Stats",
    "Osasuna": "https://fbref.com/en/squads/03c57e2b/Osasuna-Stats",
    "Rayo Vallecano": "https://fbref.com/en/squads/98e8af82/Rayo-Vallecano-Stats",
    "Real Betis": "https://fbref.com/en/squads/fc536746/Real-Betis-Stats",
    "Real Madrid": "https://fbref.com/en/squads/53a2f082/Real-Madrid-Stats",
    "Real Sociedad": "https://fbref.com/en/squads/e31d1cd9/Real-Sociedad-Stats",
    "Sevilla": "https://fbref.com/en/squads/ad2be733/Sevilla-Stats",
    "Valencia": "https://fbref.com/en/squads/dcc91a7b/Valencia-Stats",
    "Valladolid": "https://fbref.com/en/squads/17859612/Valladolid-Stats",
    "Villarreal": "https://fbref.com/en/squads/2a8183b3/Villarreal-Stats"
}

In [2]:
df = pd.DataFrame(columns=["Team", "Recent Form"])

for idx, (team, url) in enumerate(team_url_map.items()):
    try:
        # Read the match logs table (no filtering by competition)
        tables = pd.read_html(url, attrs={"id": "matchlogs_for"})
        matchlogs = tables[0]

        # Flatten multi-index if it exists
        if isinstance(matchlogs.columns, pd.MultiIndex):
            matchlogs.columns = matchlogs.columns.get_level_values(-1)

        # Remove rows with no result (i.e., future games)
        matchlogs = matchlogs.dropna(subset=["Result"])

        # Get the last 5 results
        recent_results = matchlogs.head(5)["Result"].tolist()

        # Join the results into a single string
        form_str = "".join(recent_results)

        df.at[idx, "Team"] = team
        df.at[idx, "Recent Form"] = form_str

        print(f"✔ Processed {team} - Form: {form_str}")
        time.sleep(random.uniform(4, 8))

    except Exception as e:
        print(f"❌ Failed to process {team}: {e}")


# Show results
print("\n✅ Final Result:")
print(df.head())

✔ Processed Alaves - Form: LDWWL
✔ Processed Athletic Bilbao - Form: DLWLW
✔ Processed Atletico Madrid - Form: DWDWW
✔ Processed Barcelona - Form: WWWWW
✔ Processed Celta Vigo - Form: WWLLW
✔ Processed Espanyol - Form: LLDWW
✔ Processed Getafe - Form: DDDLL
✔ Processed Girona - Form: DLWWL
✔ Processed Las Palmas - Form: DLDLL
✔ Processed Leganes - Form: DWDLL
✔ Processed Mallorca - Form: DLDWL
✔ Processed Osasuna - Form: DWLWL
✔ Processed Rayo Vallecano - Form: WDLLW
✔ Processed Real Betis - Form: DWDWL
✔ Processed Real Madrid - Form: WDWDW
✔ Processed Real Sociedad - Form: LWLDL
✔ Processed Sevilla - Form: DLDLW
✔ Processed Valencia - Form: LLLDL
✔ Processed Valladolid - Form: WLDLL
✔ Processed Villarreal - Form: DWWDW

✅ Final Result:
              Team Recent Form
0           Alaves       LDWWL
1  Athletic Bilbao       DLWLW
2  Atletico Madrid       DWDWW
3        Barcelona       WWWWW
4       Celta Vigo       WWLLW


In [3]:
df.to_csv("../../../data/teams/raw/recent_form/la_liga_recent_form.csv", index=False)