# Serie A Recent Form

In [1]:
import pandas as pd
import time
import random

# Dictionary mapping team names to their FBref squad URLs
team_url_map = {
    "Atalanta": "https://fbref.com/en/squads/922493f3/Atalanta-Stats",
    "Bologna": "https://fbref.com/en/squads/1d8099f8/Bologna-Stats",
    "Cagliari": "https://fbref.com/en/squads/c4260e09/Cagliari-Stats",
    "Como": "https://fbref.com/en/squads/28c9c3cd/Como-Stats",
    "Empoli": "https://fbref.com/en/squads/a3d88bd8/Empoli-Stats",
    "Fiorentina": "https://fbref.com/en/squads/421387cf/Fiorentina-Stats",
    "Genoa": "https://fbref.com/en/squads/658bf2de/Genoa-Stats",
    "Hellas Verona": "https://fbref.com/en/squads/0e72edf2/Hellas-Verona-Stats",
    "Inter Milan": "https://fbref.com/en/squads/d609edc0/Internazionale-Stats",
    "Juventus": "https://fbref.com/en/squads/e0652b02/Juventus-Stats",
    "Lazio": "https://fbref.com/en/squads/7213da33/Lazio-Stats",
    "Lecce": "https://fbref.com/en/squads/ffcbe334/Lecce-Stats",
    "AC Milan": "https://fbref.com/en/squads/dc56fe14/AC-Milan-Stats",
    "Monza": "https://fbref.com/en/squads/21680aa4/Monza-Stats",
    "Napoli": "https://fbref.com/en/squads/d48ad4ff/Napoli-Stats",
    "Parma": "https://fbref.com/en/squads/eab4234c/Parma-Stats",
    "Roma": "https://fbref.com/en/squads/cf74a709/Roma-Stats",
    "Torino": "https://fbref.com/en/squads/105360fe/Torino-Stats",
    "Udinese": "https://fbref.com/en/squads/04eea015/Udinese-Stats",
    "Venezia": "https://fbref.com/en/squads/af5d5982/Venezia-Stats"
}

In [2]:
df = pd.DataFrame(columns=["Team", "Recent Form"])

for idx, (team, url) in enumerate(team_url_map.items()):
    try:
        # Read the match logs table (no filtering by competition)
        tables = pd.read_html(url, attrs={"id": "matchlogs_for"})
        matchlogs = tables[0]

        # Flatten multi-index if it exists
        if isinstance(matchlogs.columns, pd.MultiIndex):
            matchlogs.columns = matchlogs.columns.get_level_values(-1)

        # Remove rows with no result (i.e., future games)
        matchlogs = matchlogs.dropna(subset=["Result"])

        # Get the last 5 results
        recent_results = matchlogs.head(5)["Result"].tolist()

        # Join the results into a single string
        form_str = "".join(recent_results)

        df.at[idx, "Team"] = team
        df.at[idx, "Recent Form"] = form_str

        print(f"✔ Processed {team} - Form: {form_str}")
        time.sleep(random.uniform(4, 8))

    except Exception as e:
        print(f"❌ Failed to process {team}: {e}")


# Show results
print("\n✅ Final Result:")
print(df.head())

✔ Processed Atalanta - Form: LWLLW
✔ Processed Bologna - Form: DLDDD
✔ Processed Cagliari - Form: WDDLL
✔ Processed Como - Form: DLDLD
✔ Processed Empoli - Form: WDWDD
✔ Processed Fiorentina - Form: DDDDD
✔ Processed Genoa - Form: WDWLD
✔ Processed Hellas Verona - Form: LWLWL
✔ Processed Inter Milan - Form: DWWDD
✔ Processed Juventus - Form: WWDDW
✔ Processed Lazio - Form: WLDWL
✔ Processed Lecce - Form: WLLWD
✔ Processed AC Milan - Form: DLDWL
✔ Processed Monza - Form: DDLDD
✔ Processed Napoli - Form: DLWWW
✔ Processed Parma - Form: LDWLL
✔ Processed Roma - Form: DLDDW
✔ Processed Torino - Form: WDWWD
✔ Processed Udinese - Form: WDWWW
✔ Processed Venezia - Form: LLDLL

✅ Final Result:
       Team Recent Form
0  Atalanta       LWLLW
1   Bologna       DLDDD
2  Cagliari       WDDLL
3      Como       DLDLD
4    Empoli       WDWDD


In [3]:
df.to_csv("../../../data/teams/raw/recent_form/serie_a_recent_form.csv", index=False)