In [1]:
### Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report, roc_auc_score
import pickle
from fastapi import FastAPI, HTTPException
from datetime import datetime

In [2]:
# Load dataset
df = pd.read_csv('../../data/player_game_statistics.csv')
display(df.columns)

Index(['stat_id', 'player_id', 'player_name', 'age', 'gender', 'country',
       'game_id', 'game_name', 'total_games_played', 'total_wins',
       'total_losses', 'total_moves', 'total_time_played_minutes', 'win_ratio',
       'rating', 'last_played'],
      dtype='object')

In [3]:
# Encode game_name
game_encoder = LabelEncoder()
df['game_encoded'] = game_encoder.fit_transform(df['game_name'])

In [4]:
# Engineer features
df['avg_session_duration'] = df['total_time_played_minutes'] / df['total_games_played']
df['historical_win_rate'] = df['total_wins'] / df['total_games_played']
df['avg_moves_per_game'] = df['total_moves'] / df['total_games_played']
df['games_experience'] = np.log1p(df['total_games_played'])

# Select features for model
features = ['avg_session_duration', 'historical_win_rate', 'avg_moves_per_game',
           'games_experience', 'age']
X = df[features]

# Create target variable (binary win/loss for next game)
df['next_game_win'] = (df['historical_win_rate'] > df['historical_win_rate'].mean()).astype(int)
y = df['next_game_win']

In [5]:
### Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
### Scale Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
### Train and Evaluate Multiple Models
models = {
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Logistic Regression": LogisticRegression(random_state=42, max_iter=1000),
    "Support Vector Machine": SVC(probability=True, random_state=42)
}

best_model = None
best_auc = 0
best_model_name = ""

for model_name, model in models.items():
    print(f"Training {model_name}...")
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    y_prob = model.predict_proba(X_test_scaled)[:, 1]
    auc = roc_auc_score(y_test, y_prob)

    print(f"{model_name} Performance:")
    print(classification_report(y_test, y_pred))
    print(f"ROC AUC Score: {auc}\n")

    if auc > best_auc:
        best_auc = auc
        best_model = model
        best_model_name = model_name

print(f"Best Model: {best_model_name} with AUC: {best_auc}")


Training Gradient Boosting...
Gradient Boosting Performance:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       102
           1       1.00      1.00      1.00        98

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200

ROC AUC Score: 1.0

Training Random Forest...
Random Forest Performance:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       102
           1       1.00      1.00      1.00        98

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200

ROC AUC Score: 1.0

Training Logistic Regression...
Logistic Regression Performance:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99       102
           1      

In [8]:
# ### Save Best Model and Scaler
# with open('win_probability_model.pkl', 'wb') as f:
#     pickle.dump(best_model, f)
# 
# with open('win_probability_scaler.pkl', 'wb') as f:
#     pickle.dump(scaler, f)