# AdaBoostClassifier

In [1]:
# increase the width of the notebook
from IPython.display import display, HTML, Markdown

display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import AdaBoostClassifier

In [3]:
# Load data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

y_train = train["Score"]
y_test = test["Score"]

X_train = train.drop("Score", axis=1)
X_test = test.drop("Score", axis=1)

## Transformations

In [4]:
from sklearn.pipeline import Pipeline

In [5]:
#Preprocessing pipelines
numeric_features = ["WhiteElo", "EloDif"]
categorical_features = ["Opening_name", "Time_format", "Increment_binary"]

numeric_transformer = Pipeline([
    ("scaler", StandardScaler())
])
categorical_transformer = Pipeline([
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer([
    ("num", numeric_transformer, numeric_features),
    ("cat", categorical_transformer, categorical_features)
])

In [6]:
X_train_transformed = preprocessor.fit_transform(X_train)
X_test_transformed  = preprocessor.transform(X_test)

## Train

In [7]:
adb = AdaBoostClassifier(random_state=42)

In [8]:
#) Define a parameter grid
param_grid_ab = {
    'n_estimators':    [100, 200, 250],
    'learning_rate':   [0.01, 0.4, 1.0],
}


In [9]:
#  Wrap in a GridSearchCV
grid_search_ab = GridSearchCV(
    adb,
    param_grid_ab,
    cv=3,
    scoring='accuracy',
    n_jobs=1,
)

In [10]:
#  Fit on transformed training set
grid_search_ab.fit(X_train_transformed, y_train)

GridSearchCV(cv=3, estimator=AdaBoostClassifier(random_state=42), n_jobs=1,
             param_grid={'learning_rate': [0.01, 0.4, 1.0],
                         'n_estimators': [100, 200, 250]},
             scoring='accuracy')

In [11]:
#  Inspect CV results
print("Best AdaBoost parameters:", grid_search_ab.best_params_)
print("Best AdaBoost CV accuracy: {:.3f}".format(grid_search_ab.best_score_))

Best AdaBoost parameters: {'learning_rate': 1.0, 'n_estimators': 250}
Best AdaBoost CV accuracy: 0.544


In [12]:
# Evaluate on the test set
y_pred_ab = grid_search_ab.predict(X_test_transformed)
print("Test set accuracy (AdaBoost): {:.3f}".format(accuracy_score(y_test, y_pred_ab)))
print("\nClassification Report (AdaBoost):\n", classification_report(y_test, y_pred_ab))

Test set accuracy (AdaBoost): 0.547

Classification Report (AdaBoost):
               precision    recall  f1-score   support

   Black Win       0.54      0.46      0.50      4524
        Draw       0.00      0.00      0.00       566
   White Win       0.55      0.69      0.61      4910

    accuracy                           0.55     10000
   macro avg       0.36      0.38      0.37     10000
weighted avg       0.52      0.55      0.53     10000



## The AdaBoost model achieved an accuracy of 0.547, which is very similar to the 0.548 obtained by the Random Forest.

In [13]:
#  Save the tuned model
import joblib
joblib.dump(grid_search_ab.best_estimator_, 'best_adaboost.joblib')

['best_adaboost.joblib']

## Random Search

In [14]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint

In [15]:
# Define a parameter distribution for RandomizedSearchCV
param_distributions_ab = {
    'n_estimators': randint(225, 325),  
    'learning_rate': uniform(0.1, 1.9), 
    'algorithm': ['SAMME', 'SAMME.R'] # Include the algorithm parameter
}

In [16]:
# Wrap in a RandomizedSearchCV
random_search_ab = RandomizedSearchCV(
    adb,
    param_distributions=param_distributions_ab,
    n_iter=10,  # Number of parameter settings that are sampled
    cv=3,
    scoring='accuracy',
    n_jobs=-1,
    random_state=42 
)

random_search_ab.fit(X_train_transformed, y_train)

In [18]:
#  Inspect CV results
print("Best AdaBoost parameters (Random Search):", random_search_ab.best_params_)
print("Best AdaBoost CV accuracy (Random Search): {:.3f}".format(random_search_ab.best_score_))

Best AdaBoost parameters (Random Search): {'algorithm': 'SAMME.R', 'learning_rate': 1.4453378978124864, 'n_estimators': 246}
Best AdaBoost CV accuracy (Random Search): 0.544


In [19]:
#  Evaluate on the test set
y_pred_ab = random_search_ab.predict(X_test_transformed)
print("Test set accuracy (AdaBoost): {:.3f}".format(accuracy_score(y_test, y_pred_ab)))
print("\nClassification Report (AdaBoost):\n", classification_report(y_test, y_pred_ab))

Test set accuracy (AdaBoost): 0.548

Classification Report (AdaBoost):
               precision    recall  f1-score   support

   Black Win       0.54      0.49      0.51      4524
        Draw       0.00      0.00      0.00       566
   White Win       0.55      0.67      0.61      4910

    accuracy                           0.55     10000
   macro avg       0.36      0.39      0.37     10000
weighted avg       0.52      0.55      0.53     10000



### Not much of an improvement!