<a href="https://colab.research.google.com/github/007Anmol/BoostingPLStats/blob/main/BoostingPLStats.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# STEP 1: IMPORTS AND UPLOAD
import pandas as pd
from google.colab import files

print("📁 Upload your EPL CSV dataset...")
uploaded = files.upload()

filename = list(uploaded.keys())[0]
print(f"✅ File uploaded: {filename}")

# STEP 2: LOAD & SORT DATA
df = pd.read_csv(filename)
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values(by='Date').reset_index(drop=True)
df['SeasonYear'] = df['Date'].dt.year

# STEP 3: DEFINE RELEVANT STATS COLUMNS
stats_columns = {
    'GoalsScored': ('FTHG', 'FTAG'),
    'GoalsConceded': ('FTAG', 'FTHG'),
    'Shots': ('HS', 'AS'),
    'ShotsOnTarget': ('HST', 'AST'),
    'Fouls': ('HF', 'AF'),
    'Corners': ('HC', 'AC'),
    'YellowCards': ('HY', 'AY'),
    'RedCards': ('HR', 'AR')
}

# STEP 4: GENERATE ROLLING FEATURES
def generate_team_features(df, windows=[5, 15, 38]):
    teams = df['HomeTeam'].unique()
    all_data = []

    for team in teams:
        team_matches = df[(df['HomeTeam'] == team) | (df['AwayTeam'] == team)].copy()
        team_matches = team_matches.sort_values(by='Date')
        team_matches['MatchIndex'] = team_matches.index

        team_stats = pd.DataFrame(index=team_matches.index)

        for stat_name, (home_col, away_col) in stats_columns.items():
            for window in windows:
                rolling_vals = []

                for i in range(len(team_matches)):
                    past_matches = team_matches.iloc[max(0, i - window):i]

                    val = 0
                    for _, row in past_matches.iterrows():
                        if row['HomeTeam'] == team:
                            val += row[home_col]
                        else:
                            val += row[away_col]
                    rolling_vals.append(val)

                col_name = f'{team}_{stat_name}_Last{window}'
                team_stats[col_name] = rolling_vals

        team_stats['MatchIndex'] = team_matches['MatchIndex']
        all_data.append(team_stats)

    # Merge all team stats
    all_features = pd.concat(all_data)
    all_features = all_features.set_index('MatchIndex')
    df = df.join(all_features)
    return df

# STEP 5: APPLY FEATURE GENERATION
print("⏳ Generating rolling features...")
df_with_features = generate_team_features(df)

# STEP 6: KEEP ONLY MATCHES FROM 2006 ONWARD
df_final = df_with_features[df_with_features['SeasonYear'] >= 2006].copy()

# STEP 7: SAVE & DOWNLOAD
output_filename = "EPL_RollingFeatures_2006_Onward.csv"
df_final.to_csv(output_filename, index=False)
print(f"✅ Final dataset saved as {output_filename}")

files.download(output_filename)


📁 Upload your EPL CSV dataset...


Saving Football Data Test Task-DESKTOP-HKE84D6(Raw Data).csv to Football Data Test Task-DESKTOP-HKE84D6(Raw Data).csv
✅ File uploaded: Football Data Test Task-DESKTOP-HKE84D6(Raw Data).csv
⏳ Generating rolling features...
✅ Final dataset saved as EPL_RollingFeatures_2006_Onward.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>