# 2021.06.21 TEST

In [125]:
from sklearn.datasets import load_digits
digits = load_digits()

In [126]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
digits_scaled = scaler.fit_transform(digits.data)

In [127]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(
    digits_scaled, digits.target, stratify=digits.target, test_size=0.2, random_state=2021
)

In [128]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [129]:
rf_clf = RandomForestClassifier(random_state=2021)
rf_clf.fit(X_train, y_train)
pred = rf_clf.predict(X_test)
print('RandomForest:', accuracy_score(y_test, pred))

RandomForest: 0.9611111111111111


In [84]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [85]:
lr = LogisticRegression(max_iter=1000)
svc = SVC()
knn = KNeighborsClassifier()

In [122]:
from sklearn.ensemble import VotingClassifier

vo_clf = VotingClassifier(
    estimators=[('LR', lr),('KNN', knn)],
    voting='soft'
)

X_train, X_test, y_train, y_test = train_test_split(digits_scaled, digits.target, test_size=0.2 , random_state= 2021)

vo_clf.fit(X_train, y_train)
pred = vo_clf.predict(X_test)
print('Softvoting:', accuracy_score(y_test, pred))

Softvoting: 0.9833333333333333


In [87]:
vo_clf.predict_proba(X_test)

array([[7.78919674e-04, 7.16434930e-02, 1.02903731e-01, ...,
        3.48362590e-04, 7.76007714e-01, 2.10624134e-03],
       [5.24652393e-05, 2.95838989e-04, 9.96992461e-01, ...,
        1.97175820e-04, 2.41623741e-05, 3.71923643e-05],
       [4.72807380e-06, 9.43031152e-04, 9.97769133e-01, ...,
        9.47783727e-06, 5.60904795e-05, 4.54779212e-06],
       ...,
       [2.18254695e-02, 5.43741876e-02, 2.85805627e-03, ...,
        1.83581489e-03, 3.06946834e-01, 1.32619476e-01],
       [4.34001915e-03, 1.42664129e-04, 4.37071979e-06, ...,
        6.26017303e-03, 1.38088163e-03, 2.21712467e-02],
       [9.96682472e-01, 4.07147166e-07, 7.24212528e-04, ...,
        4.01580670e-05, 5.47250437e-04, 9.18307154e-04]])

In [123]:
from sklearn.ensemble import VotingClassifier

vo_clf = VotingClassifier(
    estimators=[('LR', lr),('SVC', svc),('KNN', knn)],
    voting='hard'
)

X_train, X_test, y_train, y_test = train_test_split(digits_scaled, digits.target, test_size=0.2 , random_state= 2021)

vo_clf.fit(X_train, y_train)
pred = vo_clf.predict(X_test)
print('Hardvoting:', accuracy_score(y_test, pred))

Hardvoting: 0.9777777777777777


In [89]:
svc.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [146]:
params = {'C': [1,2,3,3.1,3.2,3.3,3.4,4,5,6,7,8,9,10]}

In [147]:
from sklearn.model_selection import GridSearchCV

In [148]:
grid_clf = GridSearchCV(svc, param_grid=params, cv=2, scoring='accuracy')
grid_clf.fit(X_train, y_train)

GridSearchCV(cv=2, estimator=SVC(),
             param_grid={'C': [1, 2, 3, 3.1, 3.2, 3.3, 3.4, 4, 5, 6, 7, 8, 9,
                               10]},
             scoring='accuracy')

In [149]:
grid_clf = GridSearchCV(svc, param_grid=params, cv=5, scoring='accuracy')
grid_clf.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [1, 2, 3, 3.1, 3.2, 3.3, 3.4, 4, 5, 6, 7, 8, 9,
                               10]},
             scoring='accuracy')

In [150]:
print(grid_clf.best_score_)
print(grid_clf.best_params_)

0.9916448896631824
{'C': 6}


In [151]:
best_clf = grid_clf.best_estimator_
pred = best_clf.predict(X_test)
accuracy_score(y_test, pred)

0.9805555555555555