<a href="https://colab.research.google.com/github/AravaChoudhary/ML-Pipeline-/blob/main/Adaboost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=1)

In [8]:
from sklearn.ensemble import AdaBoostClassifier

cl = AdaBoostClassifier()
cl.fit(X_train,y_train)

In [9]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

y_pred = cl.predict(X_test)
print('Current Model Performance : ')
print(f'Accuracy : {accuracy_score(y_test,y_pred)} ')
print('Classification Report : ')
print(classification_report(y_test,y_pred))
print('Confusion Matrix : ')
print(confusion_matrix(y_test,y_pred))

Current Model Performance : 
Accuracy : 0.85 
Classification Report : 
              precision    recall  f1-score   support

           0       0.84      0.82      0.83        90
           1       0.86      0.87      0.86       110

    accuracy                           0.85       200
   macro avg       0.85      0.85      0.85       200
weighted avg       0.85      0.85      0.85       200

Confusion Matrix : 
[[74 16]
 [14 96]]


In [15]:
# Adaboost Classifier
from sklearn.model_selection import GridSearchCV

params = {
    'n_estimators' : [50,100,200],
    'learning_rate' : [0.01,0.1,1.0,1.5,2.0],
    'algorithm' : ['SAMME','SAMME.R']
}

ada = AdaBoostClassifier()
clf=GridSearchCV(estimator=ada,param_grid = params,cv=5,verbose=3,n_jobs=-1) # if n_jobs=-1 , we will not get fitting information

clf.fit(X_train,y_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits


In [11]:
clf.best_params_

{'algorithm': 'SAMME.R', 'learning_rate': 0.1, 'n_estimators': 200}

In [13]:
best_model = clf.best_estimator_

In [14]:
y_pred_tuned = best_model.predict(X_test)
print('Current Model Performance : ')
print(f'Accuracy : {accuracy_score(y_test,y_pred_tuned)} ')
print('Classification Report : ')
print(classification_report(y_test,y_pred_tuned))
print('Confusion Matrix : ')
print(confusion_matrix(y_test,y_pred_tuned))

Current Model Performance : 
Accuracy : 0.88 
Classification Report : 
              precision    recall  f1-score   support

           0       0.88      0.84      0.86        90
           1       0.88      0.91      0.89       110

    accuracy                           0.88       200
   macro avg       0.88      0.88      0.88       200
weighted avg       0.88      0.88      0.88       200

Confusion Matrix : 
[[ 76  14]
 [ 10 100]]


In [21]:
# Adaboost Regressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

X, y = make_regression(n_samples=1000, n_features=2, noise=10, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.33, random_state=1)

In [22]:
from sklearn.ensemble import AdaBoostRegressor
rg = AdaBoostRegressor()
rg.fit(X_train,y_train)

In [23]:
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

y_predr = rg.predict(X_test)
print('Current Model Performance : ')
print(f'R2 Score : {r2_score(y_test,y_predr)} ')
print('MAE : ')
print(mean_absolute_error(y_test,y_predr))
print('MSE : ')
print(mean_squared_error(y_test,y_predr))

Current Model Performance : 
R2 Score : 0.9529407733610273 
MAE : 
15.211290741893523
MSE : 
392.8196536143664


In [25]:
# Adaboost Regressor
from sklearn.model_selection import GridSearchCV

params = {
    'n_estimators' : [50,100,200],
    'learning_rate' : [0.01,0.1,1.0,1.5,2.0],
    'loss' : ['linear','square','exponential']
}

adar = AdaBoostRegressor()
clfr=GridSearchCV(estimator=adar,param_grid = params,cv=5,verbose=3,n_jobs=-1)

clfr.fit(X_train,y_train)

Fitting 5 folds for each of 45 candidates, totalling 225 fits


In [26]:
clfr.best_params_

{'learning_rate': 1.5, 'loss': 'square', 'n_estimators': 200}

In [27]:
best_modelr = clfr.best_estimator_

y_predr = best_modelr.predict(X_test)
print('Current Model Performance : ')
print(f'R2 Score : {r2_score(y_test,y_predr)} ')
print('MAE : ')
print(mean_absolute_error(y_test,y_predr))
print('MSE : ')
print(mean_squared_error(y_test,y_predr))

Current Model Performance : 
R2 Score : 0.9634280556936999 
MAE : 
13.020858435954265
MSE : 
305.27867796508036
