In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# --- 1. Load and Prepare Data (Same as before) ---
try:
    df_perf = pd.read_csv('Full Dataset (league standings with players stats).csv')
    df_perf = df_perf.loc[:, ~df_perf.columns.str.contains('^Unnamed')]
    df_perf['fullname'] = df_perf['firstname'] + ' ' + df_perf['lastname']
    player_name = "James Philip Milner"
    df_player = df_perf[df_perf['fullname'] == player_name].copy()
    df_player = df_player.sort_values('season').dropna(subset=['totalGoals', 'goalAssists', 'totalShots'])
    print(f"✅ Data prepared for {player_name}")
except FileNotFoundError:
    print("❌ File not found.")
    exit()

# --- 2. Create Time-Series Sequences ---
features = ['totalGoals', 'goalAssists', 'totalShots']
target = 'totalGoals'
lookback = 2
X, y = [], []
for i in range(lookback, len(df_player)):
    X.append(df_player[features].iloc[i-lookback:i].values)
    y.append(df_player[target].iloc[i])
X, y = np.array(X), np.array(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# --- 3. Train Both Models ---
# Train LSTM
scaler_X = MinMaxScaler(feature_range=(0, 1)); scaler_y = MinMaxScaler(feature_range=(0, 1))
X_train_scaled = scaler_X.fit_transform(X_train.reshape(-1, X_train.shape[2])).reshape(X_train.shape)
X_test_scaled = scaler_X.transform(X_test.reshape(-1, X_test.shape[2])).reshape(X_test.shape)
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1))

lstm_model = Sequential([LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])), LSTM(units=50), Dense(units=1)])
lstm_model.compile(optimizer='adam', loss='mean_squared_error')
lstm_model.fit(X_train_scaled, y_train_scaled, epochs=100, batch_size=1, verbose=0)
print("✅ LSTM Model Trained.")

# Train XGBoost
X_train_xgb = X_train.reshape(X_train.shape[0], -1)
X_test_xgb = X_test.reshape(X_test.shape[0], -1)
xgbr = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100)
xgbr.fit(X_train_xgb, y_train)
print("✅ XGBoost Model Trained.")

# --- 4. Make Predictions with Both Models ---
predictions_scaled_lstm = lstm_model.predict(X_test_scaled)
predictions_lstm = scaler_y.inverse_transform(predictions_scaled_lstm)
predictions_xgb = xgbr.predict(X_test_xgb).reshape(-1, 1) # Reshape for consistency

# --- 5. Create the Ensemble Prediction ---
ensemble_predictions = (predictions_lstm + predictions_xgb) / 2
print("✅ Ensemble Predictions Created.")

# --- 6. Evaluate All Models ---
rmse_lstm = np.sqrt(mean_squared_error(y_test, predictions_lstm))
rmse_xgb = np.sqrt(mean_squared_error(y_test, predictions_xgb))
rmse_ensemble = np.sqrt(mean_squared_error(y_test, ensemble_predictions))

# --- 7. Final Comparison ---
print("\n--- 🚀 Final Model Performance 🚀 ---")
print(f"Expert 1 - LSTM Model RMSE: {rmse_lstm:.2f} goals")
print(f"Expert 2 - XGBoost Model RMSE: {rmse_xgb:.2f} goals")
print(f"Team     - Ensemble Model RMSE: {rmse_ensemble:.2f} goals")

✅ Data prepared for James Philip Milner


  super().__init__(**kwargs)


✅ LSTM Model Trained.
✅ XGBoost Model Trained.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
✅ Ensemble Predictions Created.

--- 🚀 Final Model Performance 🚀 ---
Expert 1 - LSTM Model RMSE: 2.99 goals
Expert 2 - XGBoost Model RMSE: 3.53 goals
Team     - Ensemble Model RMSE: 3.12 goals
