# Setup

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error
from joblib import dump

In [3]:
X_train_all_indices = np.load('../data/X_train_all_indices.npy')
y_train = np.load('../data/y_train.npy')

# Model Training

In [5]:
model = AdaBoostRegressor()

param_grid = {
    'n_estimators': [10, 25, 50, 100, 200, 300],
    'learning_rate': [0.0001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0],
    'loss': ['linear', 'square', 'exponential']
}

grid_search = GridSearchCV(model, param_grid, cv=3, n_jobs=-1, verbose=1, scoring='neg_mean_absolute_error')
grid_search.fit(X_train_all_indices, y_train)
print(f"Best parameters: {grid_search.best_params_}")

Fitting 3 folds for each of 126 candidates, totalling 378 fits
Best parameters: {'learning_rate': 0.0001, 'loss': 'square', 'n_estimators': 300}


In [7]:
model = AdaBoostRegressor()

param_grid = {
    'n_estimators': [300, 500, 1000],
    'learning_rate': [0.00001, 0.00005, 0.0001],
    'loss': ['linear', 'square', 'exponential']
}

grid_search = GridSearchCV(model, param_grid, cv=3, n_jobs=-1, verbose=1, scoring='neg_mean_absolute_error')
grid_search.fit(X_train_all_indices, y_train)
print(f"Best parameters: {grid_search.best_params_}")

Fitting 3 folds for each of 27 candidates, totalling 81 fits
Best parameters: {'learning_rate': 1e-05, 'loss': 'exponential', 'n_estimators': 500}


In [8]:
model_all_indices = AdaBoostRegressor(n_estimators=500, learning_rate=1e-05, loss='exponential')
model_all_indices.fit(X_train_all_indices, y_train)
dump(model_all_indices, '../models/adaboost.joblib')

['../models/adaboost.joblib']