# PL Recent Form

In [2]:
import pandas as pd
import time
import random

# Dictionary mapping team names to their FBref squad URLs
team_url_map = {
    "Arsenal": "https://fbref.com/en/squads/18bb7c10/Arsenal-Stats",
    "Aston Villa": "https://fbref.com/en/squads/8602292d/Aston-Villa-Stats",
    "Bournemouth": "https://fbref.com/en/squads/4ba7cbea/Bournemouth-Stats",
    "Brentford": "https://fbref.com/en/squads/cd051869/Brentford-Stats",
    "Brighton & Hove Albion": "https://fbref.com/en/squads/d07537b9/Brighton-and-Hove-Albion-Stats",
    "Chelsea": "https://fbref.com/en/squads/cff3d9bb/Chelsea-Stats",
    "Crystal Palace": "https://fbref.com/en/squads/47c64c55/Crystal-Palace-Stats",
    "Everton": "https://fbref.com/en/squads/d3fd31cc/Everton-Stats",
    "Fulham": "https://fbref.com/en/squads/fd962109/Fulham-Stats",
    "Ipswich Town": "https://fbref.com/en/squads/b74092de/Ipswich-Town-Stats",
    "Leicester City": "https://fbref.com/en/squads/a2d435b3/Leicester-City-Stats",
    "Liverpool": "https://fbref.com/en/squads/822bd0ba/Liverpool-Stats",
    "Manchester City": "https://fbref.com/en/squads/b8fd03ef/Manchester-City-Stats",
    "Manchester United": "https://fbref.com/en/squads/19538871/Manchester-United-Stats",
    "Newcastle United": "https://fbref.com/en/squads/b2b47a98/Newcastle-United-Stats",
    "Nottingham Forest": "https://fbref.com/en/squads/e4a775cb/Nottingham-Forest-Stats",
    "Southampton": "https://fbref.com/en/squads/33c895d4/Southampton-Stats",
    "Tottenham Hotspur": "https://fbref.com/en/squads/361ca564/Tottenham-Hotspur-Stats",
    "West Ham United": "https://fbref.com/en/squads/7c21e445/West-Ham-United-Stats",
    "Wolverhampton Wanderers": "https://fbref.com/en/squads/8cec06e1/Wolverhampton-Wanderers-Stats"
}

In [3]:
df = pd.DataFrame(columns=["Team", "Recent Form"])

for idx, (team, url) in enumerate(team_url_map.items()):
    try:
        # Read the match logs table (no filtering by competition)
        tables = pd.read_html(url, attrs={"id": "matchlogs_for"})
        matchlogs = tables[0]

        # Flatten multi-index if it exists
        if isinstance(matchlogs.columns, pd.MultiIndex):
            matchlogs.columns = matchlogs.columns.get_level_values(-1)

        # Remove rows with no result (i.e., future games)
        matchlogs = matchlogs.dropna(subset=["Result"])

        # Get the last 5 results
        recent_results = matchlogs.head(5)["Result"].tolist()

        # Join the results into a single string
        form_str = "".join(recent_results)

        df.at[idx, "Team"] = team
        df.at[idx, "Recent Form"] = form_str

        print(f"✔ Processed {team} - Form: {form_str}")
        time.sleep(random.uniform(4, 8))

    except Exception as e:
        print(f"❌ Failed to process {team}: {e}")


# Show results
print("\n✅ Final Result:")
print(df.head())

✔ Processed Arsenal - Form: WWDWD
✔ Processed Aston Villa - Form: WLWWW
✔ Processed Bournemouth - Form: DDLWL
✔ Processed Brentford - Form: WLWWL
✔ Processed Brighton & Hove Albion - Form: WWWDD
✔ Processed Chelsea - Form: LWWLD
✔ Processed Crystal Palace - Form: LLWDD
✔ Processed Everton - Form: LLWLL
✔ Processed Fulham - Form: LWWDD
✔ Processed Ipswich Town - Form: LLDDD
✔ Processed Leicester City - Form: DLWLD
✔ Processed Liverpool - Form: WWWLW
✔ Processed Manchester City - Form: DWWWW
✔ Processed Manchester United - Form: DWLLW
✔ Processed Newcastle United - Form: WDDWW
✔ Processed Nottingham Forest - Form: DWDDW
✔ Processed Southampton - Form: LLWLL
✔ Processed Tottenham Hotspur - Form: DWLLW
✔ Processed West Ham United - Form: LWWLD
✔ Processed Wolverhampton Wanderers - Form: LLWDL

✅ Final Result:
                     Team Recent Form
0                 Arsenal       WWDWD
1             Aston Villa       WLWWW
2             Bournemouth       DDLWL
3               Brentford      

In [4]:
df.to_csv("../../../data/teams/raw/recent_form/pl_recent_form.csv", index=False)