In [28]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score

In [29]:
fighters_stats = pd.read_csv('data/fighter_stats_3.csv')

In [30]:
fighters_stats

Unnamed: 0,nb_fights,L,W,D,NC,knockdowns,significant_strikes_landed,significant_strikes_attempted,significant_strikes_rate,total_strikes_landed,...,body_attempted_1,leg_landed_1,leg_attempted_1,distance_landed_1,distance_attempted_1,clinch_landed_1,clinch_attempted_1,ground_landed_1,ground_attempted_1,winner
0,8.0,2.0,6.0,0.0,0.0,0.0,26.0,49.5,0.510,30.0,...,7.0,3.0,5.0,16.0,36.0,0.0,0.0,0.0,0.0,1
1,8.0,2.0,6.0,0.0,0.0,0.0,26.0,49.5,0.510,30.0,...,5.0,1.0,1.5,6.5,15.0,3.0,5.0,1.0,1.0,0
2,8.0,2.0,6.0,0.0,0.0,0.0,26.0,49.5,0.510,30.0,...,6.0,2.0,3.0,15.0,34.0,1.0,2.0,0.0,0.0,0
3,8.0,2.0,6.0,0.0,0.0,0.0,26.0,49.5,0.510,30.0,...,3.0,0.0,0.5,11.5,33.5,0.0,0.5,0.0,0.0,1
4,8.0,2.0,6.0,0.0,0.0,0.0,26.0,49.5,0.510,30.0,...,4.0,1.0,2.0,15.0,46.0,0.0,1.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14349,1.0,1.0,0.0,0.0,0.0,0.0,1.0,7.0,0.140,1.0,...,0.5,0.0,0.5,1.5,2.5,0.0,0.0,0.0,0.0,1
14350,2.0,1.0,1.0,0.0,0.0,1.0,7.5,15.0,0.275,19.0,...,0.0,0.0,0.0,1.0,3.0,0.0,0.0,2.0,2.0,1
14351,2.0,1.0,1.0,0.0,0.0,1.0,7.5,15.0,0.275,19.0,...,3.5,1.0,1.5,0.5,4.5,5.0,9.5,1.0,1.0,0
14352,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000,0.0,...,0.0,0.0,1.5,0.0,2.0,0.0,0.0,0.0,0.0,1


## Prepare Training and Test set

In [31]:
X, y = fighters_stats.drop('winner', axis=1), fighters_stats[['winner']]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [32]:
X_test.shape

(3589, 56)

## Cross Validation hyperparameters search

In [6]:
# Paramètres à tester pour le réglage des hyperparamètres
param_grid = {
    'learning_rate': [0.05, 0.1, 0.20],
    'max_depth': [3, 6, 9,],
    'min_child_weight': [1, 10, 100],
    'n_estimators': [100, 400, 800],
}

In [None]:
# Initialize XGBoost model
xgb = XGBClassifier(random_state=42, objective='binary:logistic')

# Create grid search instance
grid_search = GridSearchCV(xgb, param_grid, cv=5, verbose=2)

# Fit grid search
grid_search.fit(X_train, y_train)

# Print best results
print("Best hyperparameters : ", grid_search.best_params_)

# Save best hyperparameters
xgb_best_param = grid_search.best_params_

In [33]:
parameters = {'learning_rate': 0.2, 'max_depth': 3, 'min_child_weight': 10, 'n_estimators': 100}

## Train model with best hyperparameters

In [34]:
# Instanciate XGBoost model with best hyperparameters
xgb_optimized = XGBClassifier(**parameters, objective= 'binary:logistic',)

# Train model using best hyperparameters
xgb_optimized.fit(X_train, y_train)

# Run prediction on test set
y_pred = xgb_optimized.predict(X_test)

# Evaluate model

In [35]:
# evaluate predictions
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f"RMSE of the base model: {rmse:.3f}")

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

RMSE of the base model: 0.466
Accuracy: 78.24%


## Save model

In [36]:
xgb_optimized.save_model('model.json')