In [1]:
import os
import pandas as pd

In [27]:
# This notebook lets us get csv with the data we want to predict
data_directory = "Fantasy-Premier-League/data/2024-25"
fixtures_file = os.path.join(data_directory, "fixtures.csv")
teams_file = os.path.join(data_directory, "teams.csv")

fixtures = pd.read_csv(fixtures_file)
teams = pd.read_csv(teams_file)

unfinished_fixtures = fixtures[~fixtures["finished"]]
next_6_gameweeks = unfinished_fixtures["event"].dropna().unique()[:6]
filtered_fixtures = unfinished_fixtures[unfinished_fixtures["event"].isin(next_6_gameweeks)]

team_columns = [
    "id", "short_name", "strength_attack_home", "strength_attack_away",
    "strength_defence_home", "strength_defence_away"
]
team_data = teams[team_columns]

filtered_fixtures = filtered_fixtures.merge(
    team_data,
    left_on="team_a",
    right_on="id",
    how="left"
).rename(columns={
    "short_name": "short_name_a",
    "strength_attack_away": "strength_attack_a",
    "strength_defence_away": "strength_defense_a"
})

filtered_fixtures.drop(columns=["strength_attack_home", "strength_defence_home"], inplace=True)

filtered_fixtures = filtered_fixtures.merge(
    team_data,
    left_on="team_h",
    right_on="id",
    how="left",
    suffixes=("", "_home")
).rename(columns={
    "short_name": "short_name_h",
    "strength_attack_home": "strength_attack_h",
    "strength_defence_home": "strength_defense_h"
})

columns_to_keep = [
    "team_a", "team_h", "strength_attack_h", "strength_attack_a",
    "strength_defense_h", "strength_defense_a", "short_name_h", 
    "short_name_a", "event"
]
final_data = filtered_fixtures[columns_to_keep]

final_data = final_data.rename(columns={"event": "gameweek"})

output_file = os.path.join(data_directory, "filtered_fixtures.csv")
final_data.to_csv(output_file, index=False)
print(f"Filtered fixtures saved to {output_file}")

Filtered fixtures saved to Fantasy-Premier-League/data/2024-25/filtered_fixtures.csv


In [28]:
# Once we have that we can make players_with_clubs.csv where we will add current clubs of the
# players and their values
data_directory = "Fantasy-Premier-League/data/2024-25/"
processed_players_file = os.path.join(data_directory, "processed_players.csv")
players_raw_file = os.path.join(data_directory, "players_raw.csv")

processed_players = pd.read_csv(processed_players_file)
players_raw = pd.read_csv(players_raw_file)

merged_data = pd.merge(
    processed_players,
    players_raw[['id', 'now_cost', 'team']],
    on='id',
    how='left'  # Left join to keep all processed players even if no match in raw
)

merged_data.rename(columns={'now_cost': 'value', 'team': 'team_id'}, inplace=True)
output_file = os.path.join(data_directory, "players_with_clubs.csv")
merged_data.to_csv(output_file, index=False)

print(f"Merged data saved to {output_file}")

Merged data saved to Fantasy-Premier-League/data/2024-25/players_with_clubs.csv


In [29]:
# Let's split the data into positions now because it will make using different models easier for us later

data_directory = "Fantasy-Premier-League/data/2024-25/"
merged_file = os.path.join(data_directory, "players_with_clubs.csv")

merged_data = pd.read_csv(merged_file)
position_folders = ["GK", "DEF", "MID", "FWD"]

for folder in position_folders:
    folder_path = os.path.join(data_directory, folder)
    os.makedirs(folder_path, exist_ok=True)
    
for position in position_folders:
    position_data = merged_data[merged_data['position'] == position]
    
    output_file = os.path.join(data_directory, position, f"{position}_with_clubs.csv")
    
    position_data.to_csv(output_file, index=False)
    print(f"Saved {position} data to {output_file}")

Saved GK data to Fantasy-Premier-League/data/2024-25/GK/GK_with_clubs.csv
Saved DEF data to Fantasy-Premier-League/data/2024-25/DEF/DEF_with_clubs.csv
Saved MID data to Fantasy-Premier-League/data/2024-25/MID/MID_with_clubs.csv
Saved FWD data to Fantasy-Premier-League/data/2024-25/FWD/FWD_with_clubs.csv


In [30]:
def add_more_features(data_directory, positions):
    for position in positions:
        folder_path = os.path.join(data_directory, position)
        position_file = os.path.join(folder_path, f"{position}_with_clubs.csv")
        
        if not os.path.exists(position_file):
            print(f"{position_file} not found. Skipping.")
            continue
        
        # Load the position-specific data
        players_with_clubs = pd.read_csv(position_file)
        updated_data = []

        # Iterate through each player
        for _, player in players_with_clubs.iterrows():
            player_id = player["id"]
            
            # Find the player's folder
            players_dir = os.path.join(data_directory, "players")
            player_folder = next(
                (os.path.join(players_dir, folder) for folder in os.listdir(players_dir)
                 if folder.endswith(f"_{player_id}")),
                None
            )
            
            if not player_folder or not os.path.exists(player_folder):
                print(f"Folder for player ID {player_id} not found. Skipping.")
                continue
            
            # Load gw.csv for the player
            gw_file = os.path.join(player_folder, "gw.csv")
            if not os.path.exists(gw_file):
                print(f"GW file not found for player ID {player_id}. Skipping.")
                continue

            gw_data = pd.read_csv(gw_file)
            gw_data = gw_data.sort_values(by=["season", "gameweek"])

            # Compute the rolling mean of expected_goals
            gw_data["mean_xg_5"] = (
                gw_data.loc[gw_data["minutes"] > 45, "expected_goals"]
                .rolling(window=5, min_periods=1)
                .mean()
            )
            gw_data.fillna(0, inplace=True)
            
            # Retrieve the latest rolling mean
            latest_mean_xg = gw_data["mean_xg_5"].iloc[-1] if not gw_data.empty else 0
            player["mean_xg_5"] = latest_mean_xg
            
            # Add the updated player data
            updated_data.append(player)

        # Save the updated data for the position
        updated_data_df = pd.DataFrame(updated_data)
        output_file = os.path.join(folder_path, f"{position}_with_clubs.csv")
        updated_data_df.to_csv(output_file, index=False)
        print(f"Updated {position} data saved to {output_file}")


data_directory = "Fantasy-Premier-League/data/2024-25"
positions = ["GK", "DEF", "MID", "FWD"]
add_more_features(data_directory, positions)

Updated GK data saved to Fantasy-Premier-League/data/2024-25/GK/GK_with_clubs.csv
Updated DEF data saved to Fantasy-Premier-League/data/2024-25/DEF/DEF_with_clubs.csv
Updated MID data saved to Fantasy-Premier-League/data/2024-25/MID/MID_with_clubs.csv
Updated FWD data saved to Fantasy-Premier-League/data/2024-25/FWD/FWD_with_clubs.csv


In [31]:
# Let's merge with fixtures now
data_directory = "Fantasy-Premier-League/data/2024-25/"
fixtures_file = os.path.join(data_directory, "filtered_fixtures.csv")
positions = ["GK", "DEF", "MID", "FWD"]

fixtures = pd.read_csv(fixtures_file)
position_mapping = {"GK": 1, "DEF": 2, "MID": 3, "FWD": 4}

for position in positions:
    folder_path = os.path.join(data_directory, position)
    position_file = os.path.join(folder_path, f"{position}_with_clubs.csv")
    
    if not os.path.exists(position_file):
        print(f"{position_file} not found. Skipping.")
        continue
    
    # Load player data
    players = pd.read_csv(position_file)
    home_merge = players.merge(
        fixtures,
        left_on="team_id",
        right_on="team_h",
        how="inner"
    )
    home_merge["was_home"] = 1
    home_merge.rename(columns={
        "team_h": "own_team",
        "team_a": "opponent_team",
        "short_name_h": "own_short_name",
        "short_name_a": "opponent_short_name",
        "strength_attack_h": "own_attack",
        "strength_defense_h": "own_defense",
        "strength_attack_a": "opponent_attack",
        "strength_defense_a": "opponent_defense"
    }, inplace=True)
    
    
    away_merge = players.merge(
        fixtures,
        left_on="team_id",
        right_on="team_a",
        how="inner"
    )
    away_merge["was_home"] = 0
    away_merge.rename(columns={
        "team_a": "own_team",
        "team_h": "opponent_team",
        "short_name_a": "own_short_name",
        "short_name_h": "opponent_short_name",
        "strength_attack_a": "own_attack",
        "strength_defense_a": "own_defense",
        "strength_attack_h": "opponent_attack",
        "strength_defense_h": "opponent_defense"
    }, inplace=True)

    merged = pd.concat([home_merge, away_merge], ignore_index=True)
    merged["crowds"] = 1
    merged["season"] = 24
    merged["POS"] = position_mapping[position]
    
    columns_to_keep = [
        "id", "first_name", "second_name", "own_team", "opponent_team", 
        "own_short_name", "opponent_short_name", "own_attack", "opponent_attack", 
        "own_defense", "opponent_defense", "was_home", "crowds", "season", "POS",
        "gameweek", "value", "mean_xg_5"
    ]
    final_data = merged[columns_to_keep]
    
    output_file = os.path.join(folder_path, f"{position}_merged_with_fixtures.csv")
    final_data.to_csv(output_file, index=False)
    print(f"Saved merged data for {position} to {output_file}")

Saved merged data for GK to Fantasy-Premier-League/data/2024-25/GK/GK_merged_with_fixtures.csv
Saved merged data for DEF to Fantasy-Premier-League/data/2024-25/DEF/DEF_merged_with_fixtures.csv
Saved merged data for MID to Fantasy-Premier-League/data/2024-25/MID/MID_merged_with_fixtures.csv
Saved merged data for FWD to Fantasy-Premier-League/data/2024-25/FWD/FWD_merged_with_fixtures.csv
