In [None]:
import pandas as pd
import glob
import os

# Standings rules: calculate points and goal difference
def compute_standings(df):
    teams = {}
    for _, row in df.iterrows():
        home = row['HomeTeam']
        away = row['AwayTeam']
        result = row['FTR']      # Full-time result: H/D/A
        home_goals = row['FTHG']
        away_goals = row['FTAG']

        # Initialize teams in dictionary
        for team in [home, away]:
            if team not in teams:
                teams[team] = {'Points': 0, 'GD': 0}

        # Assign points
        if result == 'H':
            teams[home]['Points'] += 3
        elif result == 'D':
            teams[home]['Points'] += 1
            teams[away]['Points'] += 1
        elif result == 'A':
            teams[away]['Points'] += 3

        # Update goal difference
        teams[home]['GD'] += home_goals - away_goals
        teams[away]['GD'] += away_goals - home_goals

    # Sort teams by points (descending), then GD (descending)
    sorted_teams = sorted(
        teams.items(),
        key=lambda x: (-x[1]['Points'], -x[1]['GD'])
    )
    # Return a dict mapping team → rank
    return {team: rank + 1 for rank, (team, _) in enumerate(sorted_teams)}


# Initialize an empty DataFrame for all seasons
ranking_df = pd.DataFrame()

# Folder containing the CSV files
path = "dataset"
# Read each CSV (newest season first)
for file in sorted(glob.glob(os.path.join(path, "*.csv")), reverse=True):
    # Extract season from filename, e.g. "2023-24"
    season = os.path.basename(file).replace(".csv", "")
    df = pd.read_csv(file, usecols=['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR'])
    standings = compute_standings(df)

    # Use the first two digits of the starting year as column name, e.g. "23"
    season_label = season[:4][-2:]
    season_series = pd.Series(standings, name=season_label)

    # Concatenate as a new column
    ranking_df = pd.concat([ranking_df, season_series], axis=1)

# Sort teams alphabetically and seasons chronologically (newest first)
ranking_df.index.name = "Team"
ranking_df = ranking_df.sort_index()
ranking_df = ranking_df.sort_index(axis=1, ascending=False)

# Save to CSV
ranking_df.to_csv("team_season_rankings.csv")
print("✅ Team season rankings saved to: team_season_rankings.csv")
