**Voting Classifiers**

In [2]:
from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [3]:
X,y =make_moons(n_samples=500,noise=0.30,random_state=42)
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=42)

In [4]:
from sklearn.svm import SVC

In [5]:
voting_clf=VotingClassifier(
    estimators=[
        ('lr',LogisticRegression(random_state=42)),
        ('rf',RandomForestClassifier(random_state=42)),
        ('svc',SVC(random_state=42))
    ]
)

In [6]:
voting_clf.fit(X_train,y_train)

VotingClassifier(estimators=[('lr', LogisticRegression(random_state=42)),
                             ('rf', RandomForestClassifier(random_state=42)),
                             ('svc', SVC(random_state=42))])

In [7]:
for name,clf in voting_clf.named_estimators_.items():
    print(name,"=",clf.score(X_test,y_test))

lr = 0.864
rf = 0.896
svc = 0.896


In [8]:
voting_clf.predict(X_test[:1])

array([1], dtype=int32)

In [9]:
[clf.predict(X_test[:1]) for clf in voting_clf.estimators_]

[array([1], dtype=int32), array([1], dtype=int32), array([0], dtype=int32)]

*The voting classifier outperforms al the individual classifiers*

In [10]:
voting_clf.score(X_test,y_test)

0.912

**Bagging** **and** **pasting**

In [12]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [13]:
bag_clf=BaggingClassifier(DecisionTreeClassifier(),n_estimators=500,
                          max_samples=100, n_jobs=-1, random_state=42)

In [14]:
bag_clf.fit(X_train,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=100,
                  n_estimators=500, n_jobs=-1, random_state=42)

In [15]:
from sklearn.metrics import accuracy_score
y_pred=bag_clf.predict(X_test)
accuracy_score(y_test,y_pred)

0.904

**Random Forests**

In [16]:
from sklearn.ensemble import RandomForestClassifier
rnd_clf=RandomForestClassifier(n_estimators=500, max_leaf_nodes=16,
                               n_jobs=-1, random_state=42)
rnd_clf.fit(X_train,y_train)
y_pred_rf=rnd_clf.predict(X_test)

In [17]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred_rf)

0.912

*Feature importance in random forests*

In [18]:
from sklearn.datasets import load_iris
iris=load_iris(as_frame=True)
rnd_clf= RandomForestClassifier(n_estimators=500, random_state=42)
rnd_clf.fit(iris.data,iris.target)
for score, name in zip(rnd_clf.feature_importances_,iris.data.columns):
    print(round(score,2),name)

0.11 sepal length (cm)
0.02 sepal width (cm)
0.44 petal length (cm)
0.42 petal width (cm)


**Boosting**

*AdaBoost*

In [19]:
from sklearn.ensemble import AdaBoostClassifier
ada_clf=AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1),n_estimators=30,
    learning_rate=0.5,random_state=42
)
ada_clf.fit(X_train,y_train)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1),
                   learning_rate=0.5, n_estimators=30, random_state=42)

*Gradient Boosting*

In [21]:
import numpy as np
np.random.seed(42)
X=np.random.rand(100,1)-0.5
y=3 * X[:,0] ** 2 + 0.05*np.random.randn(100) #y= 3x2+gausiian rate

In [25]:
from sklearn.ensemble import GradientBoostingRegressor
gbrt= GradientBoostingRegressor(max_depth=2, n_estimators=3,
                                 learning_rate=1.0, random_state=42)
gbrt.fit(X,y)


GradientBoostingRegressor(learning_rate=1.0, max_depth=2, n_estimators=3,
                          random_state=42)

**Stacking**

In [27]:
from sklearn.ensemble import StackingClassifier

stacking_clf=StackingClassifier(
    estimators=[
        ('lr',LogisticRegression(random_state=42)),
        ('rf',RandomForestClassifier(random_state=42)),
        ('svc',SVC(probability=True,random_state=42))
    ],
    final_estimator=RandomForestClassifier(random_state=43),
    cv=5
)
stacking_clf.fit(X_train,y_train)

StackingClassifier(cv=5,
                   estimators=[('lr', LogisticRegression(random_state=42)),
                               ('rf', RandomForestClassifier(random_state=42)),
                               ('svc', SVC(probability=True, random_state=42))],
                   final_estimator=RandomForestClassifier(random_state=43))

In [28]:
from sklearn.metrics import accuracy_score
y_pred_stacking=stacking_clf.predict(X_test)
accuracy_score(y_test,y_pred_stacking)

0.928