In [None]:
'''
Ada Boost
    - Also called as 'Adaptive Boosting'

In [None]:
'''

How AdaBoost Works
Adaboost is a boosting ensemble model and works especially well with the decision trees. 
Boosting model key is learning from the previous mistakes. It learns from the mistake by increasing the weight of the 
misclassified data points.


![15%20AdaBoost.png](attachment:15%20AdaBoost.png)

### AdaBoost_Formula

![15%20AdaBoost_Formula.png](attachment:15%20AdaBoost_Formula.png)

### 01 Adaboost (Classifer) using SK Learn

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics

iris = datasets.load_iris()

X = iris.data
y = iris.target

X_train,X_test,y_train,y_test= train_test_split(X,y,test_size = 0.3)

# Create adaboost classifer object
abc = AdaBoostClassifier(n_estimators=50,learning_rate=1)

# base_estimator: It is a weak learner used to train the model. 
# It uses DecisionTreeClassifier as default weak learner for training purpose.
# You can also specify different machine learning algorithms.

# n_estimators: Number of weak learners to train iteratively.

# learning_rate: It contributes to the weights of weak learners. It uses 1 as a default value.

# Train Adaboost Classifer
model = abc.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = model.predict(X_test)

# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9777777777777777


### 02 AdaBoost(Classifer) with Cross Validation

In [None]:
import numpy as np
import pandas as pd

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = pd.read_csv("pima-indians-diabetes.csv",names= names)
array = data.values
X = array[:,0:8]
Y = array[:,8]

seed = 7
num_trees = 30

from sklearn.model_selection import KFold
kfold = KFold(n_splits=10,random_state=seed)

from sklearn.ensemble import AdaBoostClassifier
model = AdaBoostClassifier(n_estimators=num_trees,random_state=seed)

from sklearn.model_selection import cross_val_score
results = cross_val_score(model,X,Y,cv = kfold)
print(results.mean())


### 03 AdaBoost(Regression)

In [4]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn import datasets
import pandas as pd
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

housing_data = datasets.load_boston()

df = pd.DataFrame(housing_data.data,columns=housing_data.feature_names)
X,y = shuffle(housing_data.data,housing_data.target,random_state =7)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size =0.2,random_state=7)

from sklearn.tree import DecisionTreeRegressor
ab_regressor = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4),n_estimators=400,random_state=7)
ab_regressor.fit(X_train,y_train)
y_pred = ab_regressor.predict(X_test)

from sklearn.metrics import mean_squared_error,explained_variance_score

print("MSE:",mean_squared_error(y_test,y_pred))
print("Explained Variance:",explained_variance_score(y_test,y_pred))


MSE: 22.700284929151213
Explained Variance: 0.7942180921426145
