In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split,GridSearchCV,RandomizedSearchCV,cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_digits
from sklearn import metrics
from sklearn.pipeline import make_pipeline

In [6]:
digits=load_digits()

In [8]:
x,y=digits.data,digits.target

In [10]:
X_train,x_test,Y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=1)

In [11]:
rf=RandomForestClassifier(n_estimators=100)
rf.fit(X_train,Y_train)
yhat=rf.predict(x_test)

In [13]:
print(metrics.confusion_matrix(y_test,yhat))

[[57  0  0  0  2  0  0  0  0  0]
 [ 0 48  0  0  0  1  0  0  0  0]
 [ 0  0 49  0  0  0  0  0  0  0]
 [ 0  0  0 60  0  1  0  1  2  0]
 [ 0  0  0  0 61  0  0  0  0  0]
 [ 0  0  0  0  0 46  0  0  0  1]
 [ 0  1  0  0  0  0 50  0  0  0]
 [ 0  0  0  0  0  0  0 56  0  1]
 [ 0  0  0  0  0  1  0  0 44  1]
 [ 0  0  0  0  0  2  0  0  0 55]]


In [14]:
print(metrics.classification_report(y_test,yhat))

              precision    recall  f1-score   support

           0       1.00      0.97      0.98        59
           1       0.98      0.98      0.98        49
           2       1.00      1.00      1.00        49
           3       1.00      0.94      0.97        64
           4       0.97      1.00      0.98        61
           5       0.90      0.98      0.94        47
           6       1.00      0.98      0.99        51
           7       0.98      0.98      0.98        57
           8       0.96      0.96      0.96        46
           9       0.95      0.96      0.96        57

    accuracy                           0.97       540
   macro avg       0.97      0.97      0.97       540
weighted avg       0.98      0.97      0.97       540



In [16]:
result=cross_val_score(rf,X_train,Y_train,cv=10)

In [17]:
result.mean(),result.std()

(0.9745333333333333, 0.013223019088394426)

In [22]:
pipe=make_pipeline(StandardScaler(),RandomForestClassifier())
pipe.fit(X_train,Y_train)
yhat_pipe=pipe.predict(x_test)
print(metrics.classification_report(y_test,yhat_pipe))
print(metrics.confusion_matrix(y_test,yhat_pipe))

              precision    recall  f1-score   support

           0       1.00      0.97      0.98        59
           1       1.00      0.98      0.99        49
           2       1.00      1.00      1.00        49
           3       1.00      1.00      1.00        64
           4       0.97      1.00      0.98        61
           5       0.94      0.96      0.95        47
           6       1.00      1.00      1.00        51
           7       1.00      0.98      0.99        57
           8       1.00      0.98      0.99        46
           9       0.93      0.96      0.95        57

    accuracy                           0.98       540
   macro avg       0.98      0.98      0.98       540
weighted avg       0.98      0.98      0.98       540

[[57  0  0  0  2  0  0  0  0  0]
 [ 0 48  0  0  0  1  0  0  0  0]
 [ 0  0 49  0  0  0  0  0  0  0]
 [ 0  0  0 64  0  0  0  0  0  0]
 [ 0  0  0  0 61  0  0  0  0  0]
 [ 0  0  0  0  0 45  0  0  0  2]
 [ 0  0  0  0  0  0 51  0  0  0]
 [ 0  0  0

In [26]:
param_dist = {"max_depth": [3, None],
              "max_features": np.arange(1, 11),
              "min_samples_split": np.arange(2, 11),
              "min_samples_leaf": np.arange(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [33]:
randomrCV=RandomizedSearchCV(rf,param_dist,cv=10)

In [35]:
randomrCV.fit(X_train,Y_train)
print(randomrCV.best_params_)
print(randomrCV.best_score_)

{'min_samples_split': 6, 'min_samples_leaf': 5, 'max_features': 4, 'max_depth': None, 'criterion': 'gini', 'bootstrap': False}
0.9681650793650792


In [38]:
randomrCV.fit(X_train,Y_train)
yhat=randomrCV.predict(x_test)
print(metrics.classification_report(y_test,yhat))
print(metrics.confusion_matrix(y_test,yhat))

              precision    recall  f1-score   support

           0       1.00      0.97      0.98        59
           1       0.98      0.98      0.98        49
           2       1.00      0.98      0.99        49
           3       1.00      0.95      0.98        64
           4       0.97      0.98      0.98        61
           5       0.92      0.98      0.95        47
           6       1.00      1.00      1.00        51
           7       0.92      0.98      0.95        57
           8       0.98      0.96      0.97        46
           9       0.95      0.93      0.94        57

    accuracy                           0.97       540
   macro avg       0.97      0.97      0.97       540
weighted avg       0.97      0.97      0.97       540

[[57  0  0  0  2  0  0  0  0  0]
 [ 0 48  0  0  0  1  0  0  0  0]
 [ 0  0 48  0  0  0  0  1  0  0]
 [ 0  0  0 61  0  0  0  1  1  1]
 [ 0  0  0  0 60  0  0  1  0  0]
 [ 0  0  0  0  0 46  0  0  0  1]
 [ 0  0  0  0  0  0 51  0  0  0]
 [ 0  0  0