In [47]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
X, y = make_classification(n_samples=100, n_features=10)
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [48]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB


log_clf = LogisticRegression()
gnb_clf = GaussianNB()
knn_clf = KNeighborsClassifier()
voting_clf = VotingClassifier(
estimators=[('lr', log_clf), ('gnb', gnb_clf), ('knn', knn_clf)],
voting='hard')
voting_clf.fit(X_train, y_train)


VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('gnb', GaussianNB()),
                             ('knn', KNeighborsClassifier())])

In [49]:
from sklearn.metrics import accuracy_score

In [50]:
for clf in (log_clf, gnb_clf, knn_clf, voting_clf):

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.88
GaussianNB 0.88
KNeighborsClassifier 0.88
VotingClassifier 0.92


In [53]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
from sklearn import preprocessing

bg_clf_v1 = BaggingClassifier(DecisionTreeClassifier(), n_estimators=100, max_samples=70,bootstrap=True, n_jobs=-1)
clf = make_pipeline(preprocessing.StandardScaler(), bg_clf_v1)
cross_val_score(clf, X, y, cv=5)

array([0.95, 1.  , 0.9 , 0.95, 0.8 ])

In [55]:
dt_clf = make_pipeline(preprocessing.StandardScaler(), DecisionTreeClassifier())
bg_clf = BaggingClassifier(dt_clf, n_estimators=100, max_samples=70,bootstrap=True, n_jobs=-1)
cross_val_score(bg_clf, X, y, cv=5)

array([0.95, 1.  , 0.9 , 0.85, 0.8 ])

In [56]:
bg_clf.fit(X_train, y_train)
bg_clf.score(X_test, y_test)

0.92

In [57]:
from sklearn.ensemble import RandomForestClassifier

In [59]:
rnd_forest_clf = RandomForestClassifier(random_state=100, max_features=5)
from sklearn.model_selection import GridSearchCV
gs_clf = GridSearchCV(RandomForestClassifier(), param_grid = {
    'bootstrap': [True],
    'max_depth': [80, 90, 100, 110],
    'max_features': [2, 3],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 200, 300, 1000]},cv = 3, n_jobs = -1
)
gs_clf.fit(X_train, y_train)

GridSearchCV(cv=3, estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'bootstrap': [True], 'max_depth': [80, 90, 100, 110],
                         'max_features': [2, 3], 'min_samples_leaf': [3, 4, 5],
                         'min_samples_split': [8, 10, 12],
                         'n_estimators': [100, 200, 300, 1000]})

In [62]:
gs_clf.best_params_

{'bootstrap': True,
 'max_depth': 80,
 'max_features': 2,
 'min_samples_leaf': 3,
 'min_samples_split': 8,
 'n_estimators': 300}

In [60]:
rnd_forest_clf.fit(X_train, y_train)

RandomForestClassifier(max_features=5, random_state=100)

In [61]:
rnd_forest_clf.score(X_test, y_test)

0.96