In [1]:
import os
import pandas as pd
import re
from helpers import load_csv, save_csv, log, check_file_exists

In [3]:
# Let's calculate final points
# 1. Add points from each model and bonus points
# 2. Multiply them by expected_minutes / 90
# 3. Add points from minutes
data_directory = "Fantasy-Premier-League/data/2024-25/"
positions = ["GK", "DEF", "MID", "FWD"]

# Step 1: Merge players_with_features data
all_data = []

for position in positions:
    position_file = os.path.join(data_directory, "processed_data", position, f"{position}_with_features.csv")
    if check_file_exists(position_file):
        position_data = load_csv(position_file)
        if position_data is not None:
            all_data.append(position_data)
            log(f"Loaded data for position {position}.")
    else:
        log(f"File not found for position {position}. Skipping.", level="WARNING")

if not all_data:
    log("No data to merge from players_with_features. Exiting.", level="ERROR")
    exit()


combined_players = pd.concat(all_data, ignore_index=True)
points_files_dir = "predictions"
points_files = [f for f in os.listdir(points_files_dir) if f.endswith("points.csv")]

if not points_files:
    log("No points files found for merging. Exiting.", level="ERROR")
    exit()

selected_columns=['id', 'pred_minutes', 'season_mean_bonus', 'first_name', 'second_name']
merged_data = combined_players[selected_columns]

for points_file in points_files:
    points_file_path = os.path.join(points_files_dir, points_file)
    points_data = load_csv(points_file_path)
    if points_data is not None:
        merged_data = pd.merge(merged_data, points_data, left_on="id", right_on="unique_id", how="left")
        log(f"Merged with points file: {points_file}.")
    else:
        log(f"Failed to load {points_file}. Skipping.", level="WARNING")

merged_data.fillna(0, inplace=True)

# Now for each player we
# - sum his points for each gameweek from models
# - add his bonus point prediction
# - multiply the sum by pred_minutes / 90
# - add points based on the pred_minutes and return

def extract_gameweek(col_name):
    match = re.match(r'^gw_(\d+)_.*_points$', col_name)
    if match:
        return int(match.group(1))
    else:
        return None

gameweek_to_columns = {}
for col in merged_data.columns:
    gw = extract_gameweek(col)
    if gw is not None:
        if gw not in gameweek_to_columns:
            gameweek_to_columns[gw] = []
        gameweek_to_columns[gw].append(col)

summed_gameweek_points = pd.DataFrame()
summed_gameweek_points['id'] = merged_data['id']

for gw, cols in gameweek_to_columns.items():
    gw_sum_col = f'gw_{gw}_total_points'
    summed_gameweek_points[gw_sum_col] = merged_data[cols].sum(axis=1)
    log(f"Summed points for Gameweek {gw} into column '{gw_sum_col}'.")

summed_gameweek_points['pred_minutes'] = merged_data['pred_minutes']
summed_gameweek_points['season_mean_bonus'] = merged_data['season_mean_bonus']
summed_gameweek_points['first_name'] = merged_data['first_name_x']
summed_gameweek_points['second_name'] = merged_data['second_name_x']

for gw in sorted(gameweek_to_columns.keys()):
    gw_sum_col = f'gw_{gw}_total_points'
    final_col = f'gw_{gw}_final_points'
    
    summed_gameweek_points[final_col] = (
        (summed_gameweek_points[gw_sum_col] + summed_gameweek_points['season_mean_bonus']) * 
        (summed_gameweek_points['pred_minutes'] / 90) +
        summed_gameweek_points['pred_minutes'].apply(lambda x: 2 if x >= 60 else (1 if x > 0 else 0))
    )
    
    summed_gameweek_points[final_col] = summed_gameweek_points[final_col].round(2)
    
    log(f"Calculated '{final_col}' for Gameweek {gw}.")

sorted_gameweeks = sorted(gameweek_to_columns.keys())
final_columns = ['id', 'pred_minutes', 'first_name', 'second_name']
final_columns += [f'gw_{gw}_final_points' for gw in sorted_gameweeks]
sort_columns = [f'gw_{gw}_final_points' for gw in sorted_gameweeks]
final_output = summed_gameweek_points[final_columns]

final_output = final_output.sort_values(by=sort_columns, ascending=False)
final_output.reset_index(drop=True, inplace=True)

final_output_file = "predictions/final.csv"
save_csv(final_output, final_output_file)
log(f"Final merged data saved to {final_output_file}.")

INFO: Loaded data for position GK.
INFO: Loaded data for position DEF.
INFO: Loaded data for position MID.
INFO: Loaded data for position FWD.
INFO: Merged with points file: goal_points.csv.
INFO: Merged with points file: assist_points.csv.
INFO: Summed points for Gameweek 14 into column 'gw_14_total_points'.
INFO: Summed points for Gameweek 15 into column 'gw_15_total_points'.
INFO: Summed points for Gameweek 16 into column 'gw_16_total_points'.
INFO: Summed points for Gameweek 17 into column 'gw_17_total_points'.
INFO: Summed points for Gameweek 18 into column 'gw_18_total_points'.
INFO: Summed points for Gameweek 19 into column 'gw_19_total_points'.
INFO: Calculated 'gw_14_final_points' for Gameweek 14.
INFO: Calculated 'gw_15_final_points' for Gameweek 15.
INFO: Calculated 'gw_16_final_points' for Gameweek 16.
INFO: Calculated 'gw_17_final_points' for Gameweek 17.
INFO: Calculated 'gw_18_final_points' for Gameweek 18.
INFO: Calculated 'gw_19_final_points' for Gameweek 19.
INFO: Sav