# Ligue 1 Recent Form

In [1]:
import pandas as pd
import time
import random

# Dictionary mapping team names to their FBref squad URLs
team_url_map = {
    "Angers SCO": "https://fbref.com/en/squads/69236f98/Angers-Stats",
    "AJ Auxerre": "https://fbref.com/en/squads/5ae09109/Auxerre-Stats",
    "Stade Brestois 29": "https://fbref.com/en/squads/fb08dbb3/Brest-Stats",
    "Le Havre AC": "https://fbref.com/en/squads/5c2737db/Le-Havre-Stats",
    "RC Lens": "https://fbref.com/en/squads/fd4e0f7d/Lens-Stats",
    "LOSC Lille": "https://fbref.com/en/squads/cb188c0c/Lille-Stats",
    "Olympique Lyonnais": "https://fbref.com/en/squads/d53c0b06/Lyon-Stats",
    "Olympique de Marseille": "https://fbref.com/en/squads/5725cc7b/Marseille-Stats",
    "AS Monaco": "https://fbref.com/en/squads/fd6114db/Monaco-Stats",
    "Montpellier HSC": "https://fbref.com/en/squads/281b0e73/Montpellier-Stats",
    "FC Nantes": "https://fbref.com/en/squads/d7a486cd/Nantes-Stats",
    "OGC Nice": "https://fbref.com/en/squads/132ebc33/Nice-Stats",
    "Paris Saint-Germain": "https://fbref.com/en/squads/e2d8892c/Paris-Saint-Germain-Stats",
    "Stade de Reims": "https://fbref.com/en/squads/7fdd64e0/Reims-Stats",
    "Stade Rennais FC": "https://fbref.com/en/squads/b3072e00/Rennes-Stats",
    "AS Saint-Etienne": "https://fbref.com/en/squads/d298ef2c/Saint-Etienne-Stats",
    "RC Strasbourg Alsace": "https://fbref.com/en/squads/c0d3eab4/Strasbourg-Stats",
    "Toulouse FC": "https://fbref.com/en/squads/3f8c4b5f/Toulouse-Stats"
}

In [2]:
df = pd.DataFrame(columns=["Team", "Recent Form"])

for idx, (team, url) in enumerate(team_url_map.items()):
    try:
        # Read the match logs table (no filtering by competition)
        tables = pd.read_html(url, attrs={"id": "matchlogs_for"})
        matchlogs = tables[0]

        # Flatten multi-index if it exists
        if isinstance(matchlogs.columns, pd.MultiIndex):
            matchlogs.columns = matchlogs.columns.get_level_values(-1)

        # Remove rows with no result (i.e., future games)
        matchlogs = matchlogs.dropna(subset=["Result"])

        # Get the last 5 results
        recent_results = matchlogs.head(5)["Result"].tolist()

        # Join the results into a single string
        form_str = "".join(recent_results)

        df.at[idx, "Team"] = team
        df.at[idx, "Recent Form"] = form_str

        print(f"✔ Processed {team} - Form: {form_str}")
        time.sleep(random.uniform(4, 8))

    except Exception as e:
        print(f"❌ Failed to process {team}: {e}")


# Show results
print("\n✅ Final Result:")
print(df.head())

✔ Processed Angers SCO - Form: LLLDD
✔ Processed AJ Auxerre - Form: WLLLL
✔ Processed Stade Brestois 29 - Form: LLWLW
✔ Processed Le Havre AC - Form: LWWLL
✔ Processed RC Lens - Form: WWWLD
✔ Processed LOSC Lille - Form: WDWWW
✔ Processed Olympique Lyonnais - Form: LLWDL
✔ Processed Olympique de Marseille - Form: WDWWW
✔ Processed AS Monaco - Form: WWDWW
✔ Processed Montpellier HSC - Form: DLLLW
✔ Processed FC Nantes - Form: DWWLD
✔ Processed OGC Nice - Form: LDWLW
✔ Processed Paris Saint-Germain - Form: WWWWW
✔ Processed Stade de Reims - Form: LDWWD
✔ Processed Stade Rennais FC - Form: WLLWD
✔ Processed AS Saint-Etienne - Form: LLLWL
✔ Processed RC Strasbourg Alsace - Form: DWLDD
✔ Processed Toulouse FC - Form: DDLWL

✅ Final Result:
                Team Recent Form
0         Angers SCO       LLLDD
1         AJ Auxerre       WLLLL
2  Stade Brestois 29       LLWLW
3        Le Havre AC       LWWLL
4            RC Lens       WWWLD


In [4]:
df.to_csv("../../../data/teams/raw/recent_form/ligue_1_recent_form.csv", index=False)