### Voting Classifier

In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split


In [2]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

In [6]:
from sklearn.datasets import make_moons
x, y = make_moons(n_samples=2000, noise=0.15, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [4]:
voting_clf = VotingClassifier(
    estimators=[('lr',log_clf),('rf',rnd_clf),('svc',svm_clf)],
    voting="hard")
voting_clf.fit(X_train,y_train)

In [5]:
# checking accuracy of each classifier
from sklearn.metrics import accuracy_score
for clf in (log_clf,rnd_clf,svm_clf,voting_clf):
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__,accuracy_score(y_test,y_pred))

LogisticRegression 0.86875
RandomForestClassifier 0.98125
SVC 0.99375
VotingClassifier 0.99375


## Bagging and Pasting

In [18]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(),n_estimators=500,
    max_samples=300,bootstrap=True,n_jobs=-1)

bag_clf.fit(X_train,y_train)
y_pred = bag_clf.predict(X_test)

In [19]:
accuracy_score(y_test,y_pred)

0.99

In [20]:
## Out of bag evaluation
bag_clf2 = BaggingClassifier(
    DecisionTreeClassifier(),n_estimators=500,
    max_samples=300,bootstrap=True,n_jobs=-1,oob_score=True)

bag_clf2.fit(X_train,y_train)
y_pred = bag_clf2.predict(X_test)

In [21]:
accuracy_score(y_test,y_pred)

0.9875

In [22]:
bag_clf2.oob_score_

0.98375

In [23]:
## Random Subspaces method

bag_clf3 = BaggingClassifier(
    DecisionTreeClassifier(),n_estimators=500,
    max_samples=300,bootstrap=True,n_jobs=-1,bootstrap_features=True)

bag_clf3.fit(X_train,y_train)
y_pred = bag_clf3.predict(X_test)

In [24]:
accuracy_score(y_test,y_pred)

0.98

## Random Forest

In [26]:
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500,max_leaf_nodes=16,n_jobs=-1)
rnd_clf.fit(X_train,y_train)

y_pred_rf = rnd_clf.predict(X_test)
accuracy_score(y_test,y_pred_rf)

0.99

In [27]:
## Feature importance
from sklearn.datasets import load_iris

iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500,n_jobs=-1)
rnd_clf.fit(iris["data"],iris["target"])

for name,score in zip(iris["feature_names"],rnd_clf.feature_importances_):
    print(name,score)

sepal length (cm) 0.10102972672793561
sepal width (cm) 0.02672728079504671
petal length (cm) 0.451437355972695
petal width (cm) 0.4208056365043225
