In [50]:
import sklearn.datasets
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score, learning_curve, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier

In [12]:
digits = sklearn.datasets.load_digits()

In [13]:
X = digits.data
y = digits.target

In [15]:
tree_clf = DecisionTreeClassifier()

In [16]:
scores = cross_val_score(tree_clf, X, y, cv=10, n_jobs=-1)

In [18]:
scores.mean()

0.831927374301676

In [20]:
bag_clf = BaggingClassifier(n_estimators=100)

In [21]:
scores_bag = cross_val_score(bag_clf, X, y, cv=10, n_jobs=-1)

In [22]:
scores_bag.mean()

0.9253941651148356

In [39]:
max_feat = np.sqrt(X.shape[1]).astype(int)
bag_clf2 = BaggingClassifier(n_estimators=100, max_features=max_feat)

In [40]:
scores_bag2 = cross_val_score(bag_clf2, X, y, cv=10, n_jobs=-1)

In [41]:
scores_bag2.mean()

0.9287585350713844

In [42]:
max_feat = np.sqrt(X.shape[1]).astype(int)
tree_clf = DecisionTreeClassifier(max_features=max_feat)
bag_clf3 = BaggingClassifier(base_estimator=tree_clf, n_estimators=100)

In [43]:
scores_bag3 = cross_val_score(bag_clf3, X, y, cv=10, n_jobs=-1)

In [44]:
scores_bag3.mean()

0.9487895716945995

In [46]:
random_f_clf = RandomForestClassifier(n_estimators=100)

In [47]:
scores_random_f = cross_val_score(random_f_clf, X, y, cv=10, n_jobs=-1)

In [48]:
scores_random_f.mean()

0.951582867783985

In [53]:
for trees in np.arange(1,500,50):
    random_f_clf = RandomForestClassifier(n_estimators=trees)
    print(cross_val_score(random_f_clf, X, y, cv=10, n_jobs=-1).mean())

0.7356859093730603
0.9549099937926753
0.9482278088144008
0.9493544382371197
0.9504624456859092
0.9499162011173183
0.9482433271260085
0.951576660459342
0.9482371198013656
0.9549130974549968


In [55]:
for trees in np.arange(5,16,5):
    random_f_clf = RandomForestClassifier(n_estimators=trees)
    print(cross_val_score(random_f_clf, X, y, cv=10, n_jobs=-1).mean())

0.8730943513345748
0.9148572315332093
0.929885164494103


In [56]:
for feat in np.arange(5,51,5):
    random_f_clf = RandomForestClassifier(n_estimators=100, max_features=feat)
    print(cross_val_score(random_f_clf, X, y, cv=10, n_jobs=-1).mean())

0.9515797641216635
0.9415611421477342
0.9476753569211669
0.9437740533829919
0.9437740533829919
0.9415394165114834
0.9437709497206702
0.9359838609559279
0.9387709497206703
0.9320856610800744


In [57]:
for deep in np.arange(5,100,5):
    random_f_clf = RandomForestClassifier(n_estimators=100, max_depth=deep)
    print(cross_val_score(random_f_clf, X, y, cv=10, n_jobs=-1).mean())

0.9103848541278708
0.946576660459342
0.9504748603351955
0.9482495344506517
0.9465704531346988
0.9476784605834885
0.9454531346989447
0.9471291123525759
0.9499099937926753
0.9515859714463065
0.9482340161390439
0.9487957790192427
0.9482464307883302
0.9471322160148976
0.9488081936685289
0.9543358162631904
0.9476908752327746
0.950471756672874
0.9487988826815641


In [58]:
random_f_clf = RandomForestClassifier(n_estimators=100)
print(cross_val_score(random_f_clf, X, y, cv=10, n_jobs=-1).mean())

0.9471260086902544
