In [10]:
import pandas as pd
from typing import List, Tuple
from fastapi import HTTPException

from training_utils import prepare_match_data
from features_utils import add_rolling_features


def predict_match(
    team_a_players: List[str],
    team_b_players: List[str],
    fg,
    model
) -> Tuple[float, float]:

    fs = fg.feature_store
    all_players = team_a_players + team_b_players

    # 1. Fetch historical data
    players_str = ", ".join([f"'{p}'" for p in all_players])
    sql_query = f"SELECT * FROM `{fg.name}_{fg.version}` WHERE player IN ({players_str})"

    try:
        historical_df = fs.sql(sql_query)
    except Exception:
        historical_df = fg.read()
        historical_df = historical_df[historical_df["player"].isin(all_players)]

    if historical_df.empty:
        raise ValueError("No historical data found for these players.")
    # 2. Create upcoming match rows
    upcoming_match_list = []
    
    for p in all_players:
        team = "TeamA" if p in team_a_players else "TeamB"
        player_data = historical_df[historical_df["player"] == p]
        
        if not player_data.empty:
            # Get the position from their most recent record
            player_pos = player_data["pos"].iloc[0]
        else:
            # Fallback if player history is missing (crucial for API stability)
            player_pos = "FW"

        upcoming_match_list.append({
            "player": p,
            "team": team,
            "match_id": "FUTURE_MATCH",
            "pos": player_pos,
            "age": "99:300"
        })

    upcoming_df = pd.DataFrame(upcoming_match_list)

    # 3. Combine history + upcoming
    raw_cols = [c for c in historical_df.columns if not c.startswith("rolling_avg_")]
    combined_df = pd.concat([historical_df[raw_cols], upcoming_df], ignore_index=True)

    # 4. Rolling features
    enriched_df = add_rolling_features(combined_df, window_size=6)
    
    # 5. Keep FUTURE_MATCH
    inference_data = enriched_df[enriched_df["match_id"] == "FUTURE_MATCH"]
    # 6. Team-level pivot
    X, _ = prepare_match_data(
        inference_data,
        positions=["FW", "MF", "DF"],
        target_cols=[]
    )
    print("Columns:", list(X.columns))
    #just add a match_id colomns to match training
    X["match_id"] = "FUTURE_MATCH"
    #convert to numeric
    X = X.apply(pd.to_numeric, errors='coerce')
    # 7. Feature alignment
    if hasattr(model, "estimators_"):
        expected_features = model.estimators_[0].get_booster().feature_names
    else:
        expected_features = model.get_booster().feature_names
        
    X = X[expected_features]

    # 8. Predict
    prediction = model.predict(X)

    score_a = int(round(prediction[0][0]))
    score_b = int(round(prediction[0][1]))

    return score_a, score_b


In [2]:
import os
import joblib
import hopsworks
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
project = hopsworks.login(
    api_key_value=os.environ["HOPSWORKS_API_KEY"],
    project="player_stat_prediction",
    host="eu-west.cloud.hopsworks.ai"
)

fs = project.get_feature_store()
fg = fs.get_feature_group(name="player_stats_rolling", version=1)

mr = project.get_model_registry()
retrieved_model = mr.get_model("xgboost_goals_2teams", version=1)
model_dir = retrieved_model.download()
model = joblib.load(os.path.join(model_dir, "model.pkl"))


2026-01-10 23:14:48,815 INFO: Initializing external client
2026-01-10 23:14:48,816 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2026-01-10 23:14:50,357 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/3195


Downloading: 100.000%|██████████| 375631/375631 elapsed<00:00 remaining<00:00

Downloading model artifact (0 dirs, 1 files)... DONE




In [4]:
team_a_players=  [
    "Alisson Becker", "Virgil van Dijk", "Michael Carrick", 
    "Damien Delaney", "Andrew Robertson", "Alexis Mac Allister", 
    "Dominik Szoboszlai", "Ryan Gravenberch", "Mohamed Salah", 
    "Luis Díaz", "Darwin Núñez"
  ]
team_b_players = [
    "Ederson", "Ruben Dias", "Manuel Akanji", 
    "Kyle Walker", "Josko Gvardiol", "Rodri", 
    "Kevin De Bruyne", "Bernardo Silva", "Phil Foden", 
    "Jeremy Doku", "Erling Haaland"
  ]

In [11]:
team_a_xg, team_b_xg = predict_match(
    team_a_players,
    team_b_players,
    fg,
    model
)

print("Team A xG:", round(team_a_xg, 2))
print("Team B xG:", round(team_b_xg, 2))

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (6.36s) 
Columns: ['TeamA_FW_rolling_avg_min', 'TeamA_FW_rolling_avg_gls', 'TeamA_FW_rolling_avg_ast', 'TeamA_FW_rolling_avg_pk', 'TeamA_FW_rolling_avg_pkatt', 'TeamA_FW_rolling_avg_sh', 'TeamA_FW_rolling_avg_sot', 'TeamA_FW_rolling_avg_crdy', 'TeamA_FW_rolling_avg_crdr', 'TeamA_FW_rolling_avg_touches', 'TeamA_FW_rolling_avg_tkl', 'TeamA_FW_rolling_avg_int', 'TeamA_FW_rolling_avg_blocks', 'TeamA_FW_rolling_avg_xg', 'TeamA_FW_rolling_avg_npxg', 'TeamA_FW_rolling_avg_xag', 'TeamA_FW_rolling_avg_sca', 'TeamA_FW_rolling_avg_gca', 'TeamA_FW_rolling_avg_cmp', 'TeamA_FW_rolling_avg_att', 'TeamA_FW_rolling_avg_cmp_perc', 'TeamA_FW_rolling_avg_prgp', 'TeamA_FW_rolling_avg_carries', 'TeamA_FW_rolling_avg_prgc', 'TeamA_FW_rolling_avg_att_1', 'TeamA_FW_rolling_avg_succ', 'TeamA_MF_rolling_avg_player_number', 'TeamA_MF_rolling_avg_min', 'TeamA_MF_rolling_avg_gls', 'TeamA_MF_rolling_avg_ast', 'TeamA_MF_rolling_avg_pk', 'Tea