In [1]:
import numpy as np, humanfriendly as hf
import time
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

In [2]:
def get_scores(model, Xtrain, ytrain, Xtest, ytest):
    y_pred = model.predict(Xtrain)
    train = accuracy_score(ytrain, y_pred)
    y_pred = model.predict(Xtest)
    test = accuracy_score(ytest, y_pred)
    return train, test, model.__class__.__name__

In [3]:
def get_cross(model, data, target, groups=10):
    return cross_val_score(model, data, target, cv=groups)

In [4]:
def see_time(note,start):
    end = time.perf_counter()
    elapsed = end - start
    print (note,hf.format_timespan(elapsed, detailed=True))

In [5]:
digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [7]:
knn_model = KNeighborsClassifier().fit(X_train, y_train)
train_score, test_score, knn_model_name = get_scores(knn_model, X_train, y_train,X_test, y_test)
print (knn_model_name,'train:', np.round(train_score, 2),'test:', np.round(test_score, 2))

KNeighborsClassifier train: 0.99 test: 0.98


In [8]:
start = time.perf_counter()
param_grid = {'n_neighbors': np.arange(1, 31, 2),'metric': ['euclidean', 'cityblock']}
grid = GridSearchCV(knn_model, param_grid, cv=5, n_jobs=-1)
grid.fit(X, y)
see_time('GridSearchCV total tuning time:',start=start)
best_params = grid.best_params_
print (best_params)

GridSearchCV total tuning time: 3 seconds, 993 milliseconds, 591 microseconds and 800 nanoseconds
{'metric': 'euclidean', 'n_neighbors': 3}


In [10]:
knn_tuned_model = KNeighborsClassifier(**best_params)
knn_tuned_model.fit(X_train, y_train)
train_score, test_score, knn_tuned_model_name = get_scores(knn_tuned_model, X_train, y_train,X_test, y_test)
print(knn_tuned_model_name,'train:', np.round(train_score, 2),'test:', np.round(test_score, 2))

KNeighborsClassifier train: 0.99 test: 0.99


In [11]:
lr_model = LogisticRegression(random_state=0, max_iter=4000,multi_class='auto', solver='lbfgs')
lr_model.fit(X_train, y_train)
train_score, test_score, lr_model_name = get_scores(lr_model, X_train, y_train,X_test, y_test)
print(lr_model_name,'train:', np.round(train_score, 2),'test:', np.round(test_score, 2))

LogisticRegression train: 1.0 test: 0.95


In [13]:
start = time.perf_counter()
param_grid = {'penalty': ['l2'],'solver': ['newton-cg', 'lbfgs', 'sag'],
              'max_iter': [4000], 'multi_class': ['auto'],'C': [0.001, 0.01, 0.1]}

grid = GridSearchCV(lr_model, param_grid, cv=5, n_jobs=-1)
grid.fit(X, y)
see_time('GridSearchCV total tuning time:',start=start)
bp = grid.best_params_
print (bp)

GridSearchCV total tuning time: 15 seconds, 539 milliseconds, 234 microseconds and 500 nanoseconds
{'C': 0.001, 'max_iter': 4000, 'multi_class': 'auto', 'penalty': 'l2', 'solver': 'newton-cg'}


In [15]:
lr_tuned_model = LogisticRegression(**bp, random_state=0)
lr_tuned_model.fit(X_train, y_train)
train_score, test_score, lr_model_name = get_scores(lr_tuned_model, X_train, y_train,X_test, y_test)
print(lr_model_name,'train:', np.round(train_score, 2),'test:', np.round(test_score, 2))

LogisticRegression train: 0.97 test: 0.96


In [16]:
print ('cross-validation score knn:')
knn = KNeighborsClassifier()
scores = get_cross(knn, X, y)
print (np.mean(scores))

cross-validation score knn:
0.9739482872546906
