# GradientBoostingClassifier

In [1]:
# increase the width of the notebook
from IPython.display import display, HTML, Markdown
display(HTML("<style>.container { width:90% !important; }</style>"))

import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import GradientBoostingClassifier
import joblib

In [2]:
# Load data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

y_train = train["Score"]
y_test = test["Score"]

X_train = train.drop("Score", axis=1)
X_test = test.drop("Score", axis=1)

## Transformations

In [3]:
# Preprocessing
numeric_features = ["WhiteElo", "EloDif"]
categorical_features = ["Opening_name", "Time_format", "Increment_binary"]

numeric_transformer = Pipeline([
    ("scaler", StandardScaler())
])
categorical_transformer = Pipeline([
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])
preprocessor = ColumnTransformer([
    ("num", numeric_transformer, numeric_features),
    ("cat", categorical_transformer, categorical_features)
])

## Full pipeline with Gradient Boosting Classifier

In [4]:
gbc = GradientBoostingClassifier(random_state=42)

pipe_gbc = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", gbc)
])

# Hyperparameter grid
param_grid_gbc = {
    'classifier__n_estimators': [100, 200],
    'classifier__learning_rate': [0.03, 0.1, 0.2],
    'classifier__max_depth': [3, 5, 8],
    'classifier__subsample': [0.6, 1.0],
}

# Grid Search
grid_search_gbc = GridSearchCV(
    pipe_gbc,
    param_grid=param_grid_gbc,
    cv=3,
    scoring='accuracy',
    n_jobs=1,
)

grid_search_gbc.fit(X_train, y_train)

# Report results
print("Best GBC parameters:", grid_search_gbc.best_params_)
print("Best GBC CV accuracy: {:.3f}".format(grid_search_gbc.best_score_))

y_pred_gbc = grid_search_gbc.predict(X_test)
print("Test set accuracy (GBC): {:.3f}".format(accuracy_score(y_test, y_pred_gbc)))
print("\nClassification Report (GBC):\n", classification_report(y_test, y_pred_gbc))

Best GBC parameters: {'classifier__learning_rate': 0.03, 'classifier__max_depth': 3, 'classifier__n_estimators': 100, 'classifier__subsample': 1.0}
Best GBC CV accuracy: 0.548
Test set accuracy (GBC): 0.543

Classification Report (GBC):
               precision    recall  f1-score   support

   Black Win       0.54      0.43      0.48      4524
        Draw       0.00      0.00      0.00       566
   White Win       0.54      0.71      0.62      4910

    accuracy                           0.54     10000
   macro avg       0.36      0.38      0.37     10000
weighted avg       0.51      0.54      0.52     10000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [5]:
joblib.dump(grid_search_gbc.best_estimator_, 'best_gradient_boosting.joblib')

['best_gradient_boosting.joblib']

### Gradient Boosting Classifier accuracy is also average at 0.543.