In [1]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
X, y = make_moons(n_samples=1000, noise=0.15)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [3]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

In [4]:
voting_clf = VotingClassifier(estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],voting='hard')
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='auto',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=None,
                                                 solver='lbfgs', tol=0.0001,
                                                 verbose=0, warm_start=False)),
                             ('rf',
                              RandomForestClassifier(bootstrap=True,
                                                     ccp_alpha=0.0,
                                                     class_weight=None,
                                             

In [5]:
from sklearn.metrics import accuracy_score
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.89
RandomForestClassifier 0.98
SVC 0.985
VotingClassifier 0.985


### Bagging and Pasting

In [6]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [7]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(), 
                            n_estimators = 500, 
                            max_samples = 100, 
                            bootstrap = True,
                            n_jobs = -1)

In [8]:
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)

In [9]:
print("Bagging", accuracy_score(y_test, y_pred))

Bagging 0.97


In [10]:
bag_clf_oob = BaggingClassifier(DecisionTreeClassifier(),
                               n_estimators = 500, 
                               max_samples = 100,
                               n_jobs = -1,
                               oob_score = True)

In [11]:
bag_clf_oob.fit(X_train, y_train)
bag_clf_oob.oob_score_

0.97

In [12]:
y_pred = bag_clf_oob.predict(X_test)
accuracy_score(y_test, y_pred)

0.97

In [13]:
bag_clf_oob.oob_decision_function_

array([[1.        , 0.        ],
       [1.        , 0.        ],
       [0.96559633, 0.03440367],
       ...,
       [0.        , 1.        ],
       [0.97747748, 0.02252252],
       [0.11538462, 0.88461538]])

#### Instead of building a BaggingClassifier and passing it a DecisionTreeClassifier, you can instead use the RandomForestClassifier class, which is more convenient and optimized for Decision Trees10 (similarly, there is a RandomForestRegressor class for regression tasks).

In [14]:
from sklearn.ensemble import RandomForestClassifier
rnd_clf = RandomForestClassifier(n_estimators = 500, max_leaf_nodes = 16, n_jobs = -1)
rnd_clf.fit(X_train, y_train)
y_pred_rf = rnd_clf.predict(X_test)

In [15]:
accuracy_score(y_test, y_pred_rf)

0.98

### Feature Importance

In [17]:
from sklearn.datasets import load_iris
iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1)
rnd_clf.fit(iris["data"], iris["target"])
for name, score in zip(iris["feature_names"], rnd_clf.feature_importances_):
    print(name, score)

sepal length (cm) 0.09531202550129962
sepal width (cm) 0.023341287915202284
petal length (cm) 0.4726116047350108
petal width (cm) 0.4087350818484874
