In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split

In [None]:
# Load pre-processed data
X = pd.read_csv('../data/clean/X_outfield.csv')
all = pd.read_csv('../data/clean/outfield_engineered.csv')


In [None]:
y_rating= all['rating_change']

valid_mask = y_rating.notna()
X_clean = X[valid_mask]
y_clean = y_rating[valid_mask]
X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, test_size=0.2, random_state=42)
ratingModel = xgb.XGBRegressor(n_estimators=200, max_depth=3, min_child_weight=5, learning_rate=0.1, random_state=42)
ratingModel.fit(X_train, y_train)
all

In [None]:
# Test on specific players
df_full = pd.read_csv('../data/clean/current_players_2425.csv')

# Example: Test on a specific player
player_name = 'Phil Foden'
test_player = df_full[df_full['player'] == player_name]
test_player_features = test_player[X.columns]  # Ensure only feature columns are used
predicted_change = ratingModel.predict(test_player_features)[0]

# Confidence flag for outlier growth patterns
rating_momentum = test_player_features['rating_momentum'].iloc[0]
confidence = "HIGH"
warning = ""
adjustment = ""

# Rule-based override: Prevent regression after extreme breakouts
if rating_momentum > 14 and predicted_change < 0:
    original_pred = predicted_change
    predicted_change = max(0, predicted_change + (rating_momentum * .55))  # Partial adjustment
    adjustment = f"ğŸ“ˆ Adjusted from {original_pred:+.1f} to {predicted_change:+.1f} (breakout momentum override)"
    confidence = "MEDIUM"

# Confidence warnings

print(f"Predicted rating change for {player_name}: {predicted_change:+.1f}")
if adjustment:
    print(adjustment)
if warning:
    print(warning)


In [None]:
# Test of model predicting next season's goals per 90
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

y = all['next_Per 90 Minutes_Gls']

valid_mask = y.notna()
X_clean = X[valid_mask]
y_clean = y[valid_mask]

X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, test_size=0.2, random_state=42)
valModel = xgb.XGBRegressor(n_estimators=200, max_depth=3, min_child_weight=5, learning_rate=0.1, random_state=42)
valModel.fit(X_train, y_train)

In [None]:
predicted_change = valModel.predict(test_player_features)  # Use test_player_features, not test_player
# Get player's expected minutes (use current season as proxy)
player_mins = df_full[df_full['player'] == player_name]['Playing Time_Min'].iloc[0]

# Convert goals/90 to total goals: (goals/90) * (total_minutes / 90)
total_goals = predicted_change[0] * (player_mins / 90)

print(f"Predicted next season for {player_name}:")
print(f"  Goals/90: {predicted_change[0]:.2f}")
print(f"  Total goals (based on {player_mins:.0f} mins): {total_goals:.1f}")


In [None]:
# Train separate models for each FIFA face stat
fifa_stats = ['next_pace', 'next_shooting', 'next_passing', 'next_dribbling', 'next_defending', 'next_physic']
models = {}

for stat in fifa_stats:    
    y = all[stat]
    valid_mask = y.notna()
    X_clean = X[valid_mask]
    y_clean = y[valid_mask]
    # Train model
    X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, test_size=0.2, random_state=42)
    model_temp = xgb.XGBRegressor(n_estimators=250, max_depth=3, min_child_weight=5, learning_rate=0.1, random_state=42)
    model_temp.fit(X_train, y_train)
    
    # Evaluate
    y_pred_test = model_temp.predict(X_test)
    test_r2 = r2_score(y_test, y_pred_test)
    test_mae = mean_absolute_error(y_test, y_pred_test)    
    # Save model
    models[stat] = model_temp

In [None]:
# Print results from test_player
print(f"Predicted next season FIFA stats for {player_name}:")
print("-" * 40)

for stat, model in models.items():
    prediction = model.predict(test_player_features)[0]  # Use test_player_features
    stat_name = stat.replace('next_', '').capitalize()
    print(f"{stat_name:12}: {prediction:.0f}")


In [None]:
# Test of model predicting next season's Value
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

y = all['next_value_eur']

valid_mask = y.notna()
X_clean = X[valid_mask]
y_clean = y[valid_mask]

X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, test_size=0.2, random_state=42)
val2Model = xgb.XGBRegressor(n_estimators=200, max_depth=3, min_child_weight=5, learning_rate=0.1, random_state=42)
val2Model.fit(X_train, y_train)

In [None]:
predicted_change = val2Model.predict(test_player_features)  # Use test_player_features, not test_player
print(f"Predicted next value for {player_name}: ${predicted_change[0]:.0f}")