In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import os

# --- This script assumes your trained 'model' and 'ucl_model' are in memory ---

def make_live_predictions():
    """
    Loads your five 2026 season data files, prepares the data, and uses the pre-trained 
    models to predict the next Ballon d'Or and UCL winners.
    """
    print("--- Starting Live Prediction Process for 2025-2026 Season ---")
    data_path = '../data/'

    try:
        # --- 1. Load Your 2026 Season Data Files ---
        df_league_standings = pd.read_csv(os.path.join(data_path, 'combined_league_standings_2026.csv'))
        df_league_players = pd.read_csv(os.path.join(data_path, 'combined_player_stats_2026.csv'))
        df_ucl_players = pd.read_csv(os.path.join(data_path, 'ucl_player_stats_2026.csv'))
        df_ucl_teams = pd.read_csv(os.path.join(data_path, 'ucl_league_table_2026.csv')) # Team performance
        df_ucl_progress = pd.read_csv(os.path.join(data_path, 'ucl_team_progress_2026.csv')) # Team progress
        print("\n‚úÖ All 2025-2026 data files loaded successfully.")

    except FileNotFoundError as e:
        print(f"\n--- ERROR --- \nCould not find a required file for the 2026 season. Missing file: {e.filename}")
        return

    # --- 2. Merge the 2026 Data (using the same robust process) ---
    print("\nMerging live data...")
    # (Condensed version of your final merge script)
    all_dfs = [df_league_standings, df_league_players, df_ucl_players, df_ucl_teams, df_ucl_progress]
    for df in all_dfs:
        df.columns = df.columns.str.strip()
        if 'Player' in df.columns: df['Player'] = df['Player'].str.strip()
        if 'Squad' in df.columns: df['Squad'] = df['Squad'].str.strip()
        if 'Season' in df.columns: df['Season'] = df['Season'].astype(str).str.strip()
    
    master_df_2026 = pd.merge(df_league_players, df_league_standings, on=['Squad', 'League', 'Season'], how='left', suffixes=('_player', '_team'))
    ucl_stats_to_add = df_ucl_players[['Player', 'Squad', 'Gls', 'Ast', 'Min', 'Season']]
    master_df_2026 = pd.merge(master_df_2026, ucl_stats_to_add, on=['Player', 'Squad', 'Season'], how='left', suffixes=('_league', '_ucl'))
    master_df_2026 = pd.merge(master_df_2026, df_ucl_progress, on=['Squad', 'Season'], how='left')
    ucl_cols = ['Gls_ucl', 'Ast_ucl', 'Min_ucl']; [master_df_2026[col].fillna(0, inplace=True) for col in ucl_cols if col in master_df_2026.columns]
    master_df_2026['UCL_progress'].fillna('Did Not Qualify', inplace=True)
    master_df_2026 = master_df_2026.loc[:,~master_df_2026.columns.duplicated()]
    print("2026 master dataset created successfully.")

    # ==============================================================================
    # --- 3. Predict the 2026 Ballon d'Or Winner ---
    # ==============================================================================
    print("\n\n--- Predicting 2026 Ballon d'Or Candidates ---")
    
    # Prepare the feature set (must be identical to how the model was trained)
    progress_mapping = {'W': 1, 'F': 2, 'SF': 3, 'QF': 4, 'R16': 5, 'GR': 6, 'League Phase': 6, 'Did Not Qualify': 7}
    master_df_2026['UCL_Progress_Rank'] = master_df_2026['UCL_progress'].str.strip().map(progress_mapping).fillna(7)
    
    features_to_use_bdo = [
        'Age', 'Min_league', 'Gls_league', 'Ast_league', 'xG_player', 'xAG_player',
        'Gls_ucl', 'Ast_ucl', 'Min_ucl', 'League_Rk', 'League_Pts', 'UCL_Progress_Rank'
    ]
    # Ensure all columns exist before trying to use them
    for col in features_to_use_bdo:
        if col not in master_df_2026.columns:
            master_df_2026[col] = 0 # Add missing column and fill with 0
            
    X_live_bdo = master_df_2026[features_to_use_bdo].fillna(0)
    
    # Use the pre-trained 'model' to predict probabilities
    bdo_scores = model.predict_proba(X_live_bdo)[:, 1]
    master_df_2026['Contender_Score'] = bdo_scores
    
    # Display the top 20 ranked players
    bdo_predictions = master_df_2026.sort_values(by='Contender_Score', ascending=False)
    print("\nüèÜ Top 20 Ballon d'Or Predictions (Live):")
    display(bdo_predictions[['Player', 'Squad', 'Gls_league', 'Gls_ucl', 'Contender_Score']].head(20))


    # ==============================================================================
    # --- 4. Predict the 2026 UCL Winner ---
    # ==============================================================================
    print("\n\n--- Predicting 2026 UCL Winner ---")
    
    # Prepare the team-level feature set
    ucl_teams_df_2026 = master_df_2026[master_df_2026['UCL_progress'] != 'Did Not Qualify'].copy()
    
    team_features_to_use = [
        'MP_team', 'W_team', 'D_team', 'L_team', 'GF_team', 'GA_team', 'GD_team', 
        'League_Pts', 'xG_team', 'xGA_team', 'xGD_team'
    ]
    player_agg_2026 = master_df_2026.groupby(['Squad', 'Season'])[['Gls_league', 'Ast_league', 'xG_player']].sum().reset_index()
    ucl_teams_df_2026 = pd.merge(ucl_teams_df_2026, player_agg_2026, on=['Squad', 'Season'], how='left')
    
    final_features_ucl = team_features_to_use + ['Gls_league', 'Ast_league', 'xG_player']
    # Ensure all columns exist before trying to use them
    for col in final_features_ucl:
        if col not in ucl_teams_df_2026.columns:
            ucl_teams_df_2026[col] = 0
            
    X_live_ucl = ucl_teams_df_2026[final_features_ucl].fillna(0)
    
    # Use the pre-trained 'ucl_model' to predict probabilities
    ucl_scores = ucl_model.predict_proba(X_live_ucl)[:, 1]
    ucl_teams_df_2026['Win_Probability'] = ucl_scores
    
    # Display the top 10 ranked teams
    ucl_predictions = ucl_teams_df_2026.sort_values(by='Win_Probability', ascending=False)
    print("\nüéØ Top 10 UCL Winner Predictions (Live):")
    display(ucl_predictions[['Squad', 'League', 'Win_Probability']].drop_duplicates(subset=['Squad']).head(10))


# --- Main Execution ---
# Assumes 'model' (Ballon d'Or) and 'ucl_model' are trained and in memory
if 'model' in locals() and 'ucl_model' in locals():
    make_live_predictions()
else:
    print("\n--- ERROR ---")
    print("Please make sure you have run the training scripts for the Ballon d'Or model ('model')")
    print("and the UCL Winner model ('ucl_model') in your notebook before running this script.")


--- ERROR ---
Please make sure you have run the training scripts for the Ballon d'Or model ('model')
and the UCL Winner model ('ucl_model') in your notebook before running this script.
