# *Ensemble Models*

In [31]:
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_moons
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from sklearn.datasets import load_iris
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingRegressor

%matplotlib inline

### Dataset

In [2]:
# Moons Dataset
x , y = make_moons(n_samples= 500, noise= 0.35, random_state= 42)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 0.2, random_state= 42)

### Voting Classifiers: Hard Voting

In [3]:
# Classifiers
log_clf = LogisticRegression()
ran_clf = RandomForestClassifier()
svm_clf = SVC()

voting_clf = VotingClassifier(estimators=[('lr', log_clf), ('rf', ran_clf), ('svc', svm_clf)],
                              voting= 'hard')
voting_clf.fit(x_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('rf', RandomForestClassifier()), ('svc', SVC())])

### Accuracy check

In [4]:
for clf in (log_clf, ran_clf, svm_clf, voting_clf):
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
#     .__class__.__name__  --> References the names of the objects that we created before
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.84
RandomForestClassifier 0.84
SVC 0.85
VotingClassifier 0.84


### Bagging & Pasting in Scikit-Learn

In [5]:
bag_clf = BaggingClassifier(base_estimator= DecisionTreeClassifier(),
                            n_estimators= 500,
                            max_samples= 100,
                            bootstrap= True,
                            n_jobs= -1)

bag_clf.fit(x_train, y_train)
y_pred = bag_clf.predict(x_test)

### Evaluation

In [6]:
# oob evaluation

bag_clf1 = BaggingClassifier(DecisionTreeClassifier(),
                             n_estimators= 500,
                             bootstrap= True,
                             n_jobs= -1,
                             oob_score= True)

bag_clf1.fit(x_train, y_train)
bag_clf1.oob_score_

0.8775

In [19]:
y_pred1 = bag_clf1.predict(x_test)
accuracy_score(y_test, y_pred1)

0.84

In [10]:
bag_clf1.oob_decision_function_

array([[1.        , 0.        ],
       [0.        , 1.        ],
       [0.        , 1.        ],
       [0.58857143, 0.41142857],
       [0.93370166, 0.06629834],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.6827957 , 0.3172043 ],
       [0.26041667, 0.73958333],
       [0.02339181, 0.97660819],
       [0.24022346, 0.75977654],
       [0.73863636, 0.26136364],
       [1.        , 0.        ],
       [0.01092896, 0.98907104],
       [0.65822785, 0.34177215],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.6978022 , 0.3021978 ],
       [0.00543478, 0.99456522],
       [0.05555556, 0.94444444],
       [0.58139535, 0.41860465],
       [0.69892473, 0.30107527],
       [0.95977011, 0.04022989],
       [0.35135135, 0.64864865],
       [0.21387283, 0.78612717],
       [1.        , 0.        ],
       [0.00515464, 0.99484536],
       [0.38219895, 0.61780105],
       [0.

# *Random Forests Classifier*

In [21]:
# 500 Decision Trees, max_leaf_nodes: 16

rfor_clf = RandomForestClassifier(n_estimators= 500,
                                  max_leaf_nodes= 16,
                                  random_state= 42,
                                  n_jobs= -1)

rfor_clf.fit(x_train, y_train)

RandomForestClassifier(max_leaf_nodes=16, n_estimators=500, n_jobs=-1,
                       random_state=42)

In [22]:
y_pred_rf = rfor_clf.predict(x_test)
f1_score(y_test, y_pred_rf)
# accuracy_score(y_test, y_pred_rf)

0.8543689320388349

### *Feature Importance: Iris Dataset & Random Forest Classifier*

In [26]:
iris = load_iris()
iris_rf_clf = RandomForestClassifier(n_estimators= 500,
                                     n_jobs= -1,
                                     random_state= 42)

iris_rf_clf.fit(iris["data"], iris["target"])
iris_rf_clf.feature_importances_

for name, score in zip(iris["feature_names"], iris_rf_clf.feature_importances_):
    print(name, ": ",score)

sepal length (cm) :  0.11249225099876375
sepal width (cm) :  0.02311928828251033
petal length (cm) :  0.4410304643639577
petal width (cm) :  0.4233579963547682


### *AdaBoost Classifier*

In [28]:
ada_clf = AdaBoostClassifier(base_estimator= DecisionTreeClassifier(max_depth= 1),
                             n_estimators= 200,
                             algorithm="SAMME.R",
                             random_state= 42,
                             learning_rate= 0.5)

ada_clf.fit(x_train, y_train)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1),
                   learning_rate=0.5, n_estimators=200, random_state=42)

In [29]:
ada_preds = ada_clf.predict(x_test)
f1_score(y_test, ada_preds)

0.8971962616822429

### *Gradient Boosting Regressor*

In [32]:
gbrt = GradientBoostingRegressor(max_depth= 2, n_estimators= 3, learning_rate= 1)
gbrt.fit(x, y)

GradientBoostingRegressor(learning_rate=1, max_depth=2, n_estimators=3)