In [1]:
import pandas as pd
import numpy as np
import os
import xgboost as xgb
from helpers import load_csv, save_csv, log, check_file_exists, load_and_merge_prediction_data, preprocess_prediction_data, make_predictions, save_predictions

In [2]:
def calculate_blended_rate(data, cumulative_total_col, cumulative_metric_col, smoothing_factor=5):
    """
    Calculate a blended rate using cumulative metrics as a proxy for player efficiency.

    Args:
        data (pd.DataFrame): The input data containing 'id', cumulative totals, and cumulative metrics.
        cumulative_total_col (str): The column name for the cumulative total metric (e.g., 'cumulative_goals').
        cumulative_metric_col (str): The column name for the cumulative metric (e.g., 'cumulative_xg').
        smoothing_factor (float): Smoothing factor for blending.

    Returns:
        pd.DataFrame: DataFrame with 'id' and the blended rate column.
    """
    # Group by player (id) and calculate cumulative metrics
    grouped = data.groupby("id").agg(
        cumulative_total=(cumulative_total_col, "max"),
        cumulative_metric=(cumulative_metric_col, "max")
    ).reset_index()

    # Default blended_rate to 1 for all players
    grouped["blended_rate"] = 1

    # Identify players with at least 3 cumulative goals or assists
    valid_players = grouped["cumulative_total"] >= 3

    # Calculate player-level rate for valid players
    grouped.loc[valid_players, "player_rate"] = (
        grouped.loc[valid_players, "cumulative_total"] /
        grouped.loc[valid_players, "cumulative_metric"]
    )
    grouped["player_rate"] = grouped["player_rate"].fillna(0)  # Handle division by zero

    # Calculate league-wide rate
    total_sum = grouped.loc[valid_players, "cumulative_total"].sum()
    metric_sum = grouped.loc[valid_players, "cumulative_metric"].sum()
    league_rate = total_sum / metric_sum if metric_sum > 0 else 1

    # Blend the rate using cumulative metrics as weight
    grouped.loc[valid_players, "weight"] = (
        grouped.loc[valid_players, "cumulative_metric"] /
        (grouped.loc[valid_players, "cumulative_metric"] + smoothing_factor)
    )
    grouped.loc[valid_players, "blended_rate"] = (
        grouped.loc[valid_players, "weight"] * grouped.loc[valid_players, "player_rate"] +
        (1 - grouped.loc[valid_players, "weight"]) * league_rate
    )

    return grouped[["id", "blended_rate"]]

def add_blended_conversion_rate(data, metric_type):
    """
    Add blended conversion rate to the dataset for the specified metric.

    Args:
        data (pd.DataFrame): The input dataset containing player stats.
        metric_type (str): The metric type ('goals' or 'assists').

    Returns:
        pd.DataFrame: The dataset with the blended conversion rate added.
    """
    if metric_type == "goals":
        blended_rate_df = calculate_blended_rate(
            data, cumulative_total_col="cumulative_npg", cumulative_metric_col="cumulative_npxg"
        )
        blended_rate_df.rename(columns={"blended_rate": "blended_goal_conversion_rate"}, inplace=True)
    elif metric_type == "assists":
        blended_rate_df = calculate_blended_rate(
            data, cumulative_total_col="cumulative_assists", cumulative_metric_col="cumulative_xa"
        )
        blended_rate_df.rename(columns={"blended_rate": "blended_assist_conversion_rate"}, inplace=True)
    else:
        raise ValueError("Invalid metric type. Use 'goals' or 'assists'.")

    # Merge blended rate back into the original dataset
    data = data.merge(blended_rate_df, on="id", how="left")
    return data

In [3]:
def calculate_points_for_goals(data, prediction_column, output_column):
    """
    Multiply predicted goals by points based on player position.

    Args:
        data (pd.DataFrame): Dataset containing 'POS' and predicted goals.
        prediction_column (str): Column with predicted goals (e.g., 'predicted_xG').
        output_column (str): Column to store calculated points.

    Returns:
        pd.DataFrame: Updated dataset with calculated points.
    """
    # Define points for each position
    position_points = {2: 6, 3: 5, 4: 4}  # 2 -> Defender, 3 -> Midfielder, 4 -> Forward

    # Map points to positions and calculate final points
    data[output_column] = data["POS"].map(position_points) * data[prediction_column]
    return data

In [4]:
data_directory = "Fantasy-Premier-League/data/2024-25"
positions = ["DEF", "MID", "FWD"]
model_path_xg = "models/xgboost_xg_prediction_model.json"
model_path_xa = "models/xgboost_xa_prediction_model.json"
output_file_xg = "predictions/xG_predictions.csv"
output_file_goals = "predictions/goal_predictions.csv"
output_file_goalpoints = "predictions/goal_points.csv"
output_file_xa = "predictions/xA_predictions.csv"
output_file_assists = "predictions/assist_predictions.csv"
output_file_assistpoints = "predictions/assist_points.csv"

# Load and merge data
merged_data = load_and_merge_prediction_data(data_directory, positions)

# Add blended conversion rates for goals and assists
merged_data = add_blended_conversion_rate(merged_data, metric_type="goals")
merged_data = add_blended_conversion_rate(merged_data, metric_type="assists")

# Preprocess the data
preprocessed_data = preprocess_prediction_data(merged_data)

# Make predictions for xG
predicted_data_xg = make_predictions(preprocessed_data, model_path_xg, "predicted_xG")
predicted_data_xg["predicted_goals"] = (
    predicted_data_xg["predicted_xG"] * predicted_data_xg["blended_goal_conversion_rate"]
)
predicted_data_xg = calculate_points_for_goals(
    predicted_data_xg, 
    prediction_column="predicted_goals", 
    output_column="predicted_goal_points"
)

# Save xG-related predictions
save_predictions(predicted_data_xg, ["unique_id", "first_name", "second_name", "gameweek"], "predicted_xG", output_file_xg)
save_predictions(predicted_data_xg, ["unique_id", "first_name", "second_name", "gameweek"], "predicted_goals", output_file_goals)
save_predictions(predicted_data_xg, ["unique_id", "first_name", "second_name", "gameweek"], "predicted_goal_points", output_file_goalpoints)

# Make predictions for xA
predicted_data_xa = make_predictions(preprocessed_data, model_path_xa, "predicted_xA")
predicted_data_xa["predicted_assists"] = (
    predicted_data_xa["predicted_xA"] * predicted_data_xa["blended_assist_conversion_rate"]
)
predicted_data_xa["predicted_assist_points"] = predicted_data_xa["predicted_assists"] * 3

# Save xA-related predictions
save_predictions(predicted_data_xa, ["unique_id", "first_name", "second_name", "gameweek"], "predicted_xA", output_file_xa)
save_predictions(predicted_data_xa, ["unique_id", "first_name", "second_name", "gameweek"], "predicted_assists", output_file_assists)
save_predictions(predicted_data_xa, ["unique_id", "first_name", "second_name", "gameweek"], "predicted_assist_points", output_file_assistpoints)

Merged Fantasy-Premier-League/data/2024-25/processed_data/DEF/DEF_final.csv
Merged Fantasy-Premier-League/data/2024-25/processed_data/MID/MID_final.csv
Merged Fantasy-Premier-League/data/2024-25/processed_data/FWD/FWD_final.csv
Loaded model from models/xgboost_xg_prediction_model.json


 0.95138276 1.0762922  1.11307344 0.7777539  0.87604248 0.80227219
 1.53673557 1.02040957 1.08415961 1.12812693 1.22939423 0.85593729
 1.04655269 0.89886346 1.05766151 1.1662056  0.84498799 1.34953999
 1.33330855 0.78891121 1.40438894 1.08734033 1.09373055 1.16729411
 1.15476458 1.19470478 0.90646374 1.41250597 0.8678786  1.34885769
 0.96656923 1.32144632 0.98953603 1.23141846 0.93121061 1.15603477
 1.22149119 1.02230155 1.18204618 1.45277202 1.2482455  1.26233017
 0.96735808 0.86876025]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  grouped.loc[valid_players, "blended_rate"] = (
 1.34218746 1.94351202 1.26740564 1.4941298  1.55819042 1.75774262
 1.49043826 1.18304399 1.33188672 1.6077803  1.59786919 1.13339498
 1.30566585 1.5263779  1.24949464 1.09568733 1.64860014 1.53297073
 1.08051382 1.70864679 1.37755196 1.44455289 1.1572013  1.62379677
 1.40732654 1.76297083 1.5586905  1.41694646 1.37022659 1.10513308
 1.39399888 1.21168984 1.44067348 1

Predictions saved to predictions/xG_predictions.csv
Predictions saved to predictions/goal_predictions.csv
Predictions saved to predictions/goal_points.csv
Loaded model from models/xgboost_xa_prediction_model.json
Predictions saved to predictions/xA_predictions.csv
Predictions saved to predictions/assist_predictions.csv
Predictions saved to predictions/assist_points.csv
