In [31]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.ensemble import RandomForestClassifier,VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.datasets import make_moons, load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier, ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier

In [19]:
X,y=make_moons(n_samples=1000,noise=0.4)
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=42)

log_reg=LogisticRegression()
rnd_clf=RandomForestClassifier()
svm_clf=SVC(probability=True)

voting_clf=VotingClassifier(
    estimators=[("lr",log_reg), ("rf",rnd_clf), ("svc",svm_clf)],
    voting="soft")
voting_clf.fit(X_train,y_train)

for clf in (log_reg,rnd_clf,svm_clf,voting_clf):
    clf.fit(X_train,y_train)
    y_pred=clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test,y_pred))

LogisticRegression 0.824
RandomForestClassifier 0.824
SVC 0.864
VotingClassifier 0.872


In [21]:
bag_clf=BaggingClassifier(DecisionTreeClassifier(),
                         n_estimators=500,max_samples=100,bootstrap=True,n_jobs=-1,
                         oob_score=True)
bag_clf.fit(X_train,y_train)
y_pred=bag_clf.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(bag_clf.oob_score_)

0.868
0.86


In [22]:
bag_clf.oob_decision_function_

array([[0.96803653, 0.03196347],
       [0.97453704, 0.02546296],
       [0.91990847, 0.08009153],
       ...,
       [0.06772009, 0.93227991],
       [0.25462963, 0.74537037],
       [0.80974478, 0.19025522]])

In [23]:
rnd_clf=RandomForestClassifier(n_estimators=500,max_leaf_nodes=16,n_jobs=-1)
rnd_clf.fit(X_train,y_train)
y_pred_rf=rnd_clf.predict(X_test)
print(accuracy_score(y_test,y_pred_rf))

0.864


In [27]:
ext_clf=ExtraTreesClassifier(n_estimators=500,max_leaf_nodes=16,n_jobs=-1)
ext_clf.fit(X_train,y_train)
y_pred_ext=ext_clf.predict(X_test)
print(accuracy_score(y_test,y_pred_ext))

0.876


In [29]:
iris=load_iris()
rnd_clf=RandomForestClassifier(n_estimators=500,n_jobs=-1)
rnd_clf.fit(iris["data"],iris["target"])
for name,score in zip(iris["feature_names"],rnd_clf.feature_importances_):
    print(name,score)

sepal length (cm) 0.09934422344957387
sepal width (cm) 0.0259621951174956
petal length (cm) 0.4251900067595551
petal width (cm) 0.44950357467337543


In [37]:
ada_clf=AdaBoostClassifier(
                        DecisionTreeClassifier(max_depth=1),n_estimators=200,
algorithm="SAMME.R",learning_rate=0.15)
ada_clf.fit(X_train,y_train)
y_pred_ada=ada_clf.predict(X_test)
print(accuracy_score(y_test,y_pred_ada))

0.856
