# AdaBoostClassifier

In [1]:
# increase the width of the notebook
from IPython.display import display, HTML, Markdown
display(HTML("<style>.container { width:90% !important; }</style>"))

import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report
from scipy.stats import uniform, randint
import joblib

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

y_train = train["Score"]
y_test = test["Score"]

X_train = train.drop("Score", axis=1)
X_test = test.drop("Score", axis=1)

## Transformations

In [3]:
# Preprocessing
numeric_features = ["WhiteElo", "EloDif"]
categorical_features = ["Opening_name", "Time_format", "Increment_binary"]

numeric_transformer = Pipeline([
    ("scaler", StandardScaler())
])
categorical_transformer = Pipeline([
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])
preprocessor = ColumnTransformer([
    ("num", numeric_transformer, numeric_features),
    ("cat", categorical_transformer, categorical_features)
])

In [4]:
# Base model
adb = AdaBoostClassifier(random_state=42)

pipe_ab = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", adb)
])

# Grid Search
param_grid_ab = {
    'classifier__n_estimators': [100, 200, 250],
    'classifier__learning_rate': [0.01, 0.4, 1.0],
}

grid_search_ab = GridSearchCV(
    pipe_ab,
    param_grid=param_grid_ab,
    cv=3,
    scoring='accuracy',
    n_jobs=1,
)

grid_search_ab.fit(X_train, y_train)

print("Best AdaBoost parameters:", grid_search_ab.best_params_)
print("Best AdaBoost CV accuracy: {:.3f}".format(grid_search_ab.best_score_))

y_pred_ab = grid_search_ab.predict(X_test)
print("Test set accuracy (AdaBoost): {:.3f}".format(accuracy_score(y_test, y_pred_ab)))
print("\nClassification Report (AdaBoost):\n", classification_report(y_test, y_pred_ab))

Best AdaBoost parameters: {'classifier__learning_rate': 0.01, 'classifier__n_estimators': 100}
Best AdaBoost CV accuracy: 0.543
Test set accuracy (AdaBoost): 0.539

Classification Report (AdaBoost):
               precision    recall  f1-score   support

   Black Win       0.54      0.39      0.45      4524
        Draw       0.00      0.00      0.00       566
   White Win       0.54      0.74      0.62      4910

    accuracy                           0.54     10000
   macro avg       0.36      0.38      0.36     10000
weighted avg       0.51      0.54      0.51     10000



## The AdaBoost model achieved an accuracy of 0.539, which is close to the 0.548 obtained by the Random Forest.

In [5]:
joblib.dump(grid_search_ab.best_estimator_, 'best_adaboost.joblib')

['best_adaboost.joblib']

## Random Search

In [6]:
param_distributions_ab = {
    'classifier__n_estimators': randint(225, 325),  
    'classifier__learning_rate': uniform(0.1, 1.9), 
    'classifier__algorithm': ['SAMME', 'SAMME.R']
}

random_search_ab = RandomizedSearchCV(
    pipe_ab,
    param_distributions=param_distributions_ab,
    n_iter=10,
    cv=3,
    scoring='accuracy',
    n_jobs=-1,
    random_state=42
)

random_search_ab.fit(X_train, y_train)

print("Best AdaBoost parameters (Random Search):", random_search_ab.best_params_)
print("Best AdaBoost CV accuracy (Random Search): {:.3f}".format(random_search_ab.best_score_))

y_pred_ab = random_search_ab.predict(X_test)
print("Test set accuracy (AdaBoost): {:.3f}".format(accuracy_score(y_test, y_pred_ab)))
print("\nClassification Report (AdaBoost):\n", classification_report(y_test, y_pred_ab))

Best AdaBoost parameters (Random Search): {'classifier__algorithm': 'SAMME', 'classifier__learning_rate': 1.9428287191077893, 'classifier__n_estimators': 254}
Best AdaBoost CV accuracy (Random Search): 0.541
Test set accuracy (AdaBoost): 0.545

Classification Report (AdaBoost):
               precision    recall  f1-score   support

   Black Win       0.55      0.41      0.47      4524
        Draw       0.00      0.00      0.00       566
   White Win       0.54      0.73      0.62      4910

    accuracy                           0.55     10000
   macro avg       0.36      0.38      0.36     10000
weighted avg       0.52      0.55      0.52     10000



### We got some improvement!