# GradientBoostingClassifier

In [1]:
# increase the width of the notebook
from IPython.display import display, HTML, Markdown

display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import GradientBoostingClassifier

In [3]:
# Load data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

y_train = train["Score"]
y_test = test["Score"]

X_train = train.drop("Score", axis=1)
X_test = test.drop("Score", axis=1)

## Transformations

In [4]:
from sklearn.pipeline import Pipeline

In [5]:
#Preprocessing pipelines
numeric_features = ["WhiteElo", "EloDif"]
categorical_features = ["Opening_name", "Time_format", "Increment_binary"]

numeric_transformer = Pipeline([
    ("scaler", StandardScaler())
])
categorical_transformer = Pipeline([
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer([
    ("num", numeric_transformer, numeric_features),
    ("cat", categorical_transformer, categorical_features)
])

In [6]:
X_train_transformed = preprocessor.fit_transform(X_train)
X_test_transformed  = preprocessor.transform(X_test)

## Train

In [7]:
gbc = GradientBoostingClassifier(random_state=42)

In [8]:
#  Define a hyperparameter grid
param_grid_gbc = {
    'n_estimators': [100, 200],
    'learning_rate': [0.03, 0.1, 0.2],
    'max_depth': [3, 5, 8],
    'subsample': [0.6,  1.0],
}

In [9]:
# Wrap in a GridSearchCV
grid_search_gbc = GridSearchCV(
    gbc,
    param_grid_gbc,
    cv=3,
    scoring='accuracy',
    n_jobs=1,
)

In [10]:
#  Fit on the transformed training set
grid_search_gbc.fit(X_train_transformed, y_train)

GridSearchCV(cv=3, estimator=GradientBoostingClassifier(random_state=42),
             n_jobs=1,
             param_grid={'learning_rate': [0.03, 0.1, 0.2],
                         'max_depth': [3, 5, 8], 'n_estimators': [100, 200],
                         'subsample': [0.6, 1.0]},
             scoring='accuracy')

In [11]:
#  Report CV results
print("Best GBC parameters:", grid_search_gbc.best_params_)
print("Best GBC CV accuracy: {:.3f}".format(grid_search_gbc.best_score_))

Best GBC parameters: {'learning_rate': 0.03, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.6}
Best GBC CV accuracy: 0.548


In [12]:
#  Evaluate on your test set
y_pred_gbc = grid_search_gbc.predict(X_test_transformed)
print("Test set accuracy (GBC): {:.3f}".format(accuracy_score(y_test, y_pred_gbc)))
print("\nClassification Report (GBC):\n", classification_report(y_test, y_pred_gbc))

Test set accuracy (GBC): 0.543

Classification Report (GBC):
               precision    recall  f1-score   support

   Black Win       0.54      0.44      0.48      4524
        Draw       0.00      0.00      0.00       566
   White Win       0.54      0.71      0.61      4910

    accuracy                           0.54     10000
   macro avg       0.36      0.38      0.37     10000
weighted avg       0.51      0.54      0.52     10000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
import joblib
joblib.dump(grid_search_gbc.best_estimator_, 'best_gradient_boosting.joblib')

['best_gradient_boosting.joblib']

### Gradient Boosting Classifier accuracy is also average at 0.543.