In [6]:
# Common stuff

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
%config InlineBackend.figure_format = 'retina'
from pylab import rcParams
rcParams['figure.figsize'] = (9, 6)
from IPython.display import display

In [8]:
from sklearn.datasets import load_digits
data = load_digits()
Xtrain = pd.DataFrame(data['data'])
Ytrain = pd.Series(data['target'])
display(Xtrain.head())
display(Ytrain.head())

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0,0,5,13,9,1,0,0,0,0,...,0,0,0,0,6,13,10,0,0,0
1,0,0,0,12,13,5,0,0,0,0,...,0,0,0,0,0,11,16,10,0,0
2,0,0,0,4,15,12,0,0,0,0,...,5,0,0,0,0,3,11,16,9,0
3,0,0,7,15,13,1,0,0,0,8,...,9,0,0,0,7,13,13,9,0,0
4,0,0,0,1,11,0,0,0,0,0,...,0,0,0,0,0,2,16,4,0,0


0    0
1    1
2    2
3    3
4    4
dtype: int64

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1,0,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0


In [91]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

def eval_decision_tree(Xtrain, Ytrain, depths, use_dummies):    
    y = pd.get_dummies(Ytrain) if use_dummies else Ytrain
    features_num = np.arange(1, Xtrain.shape[1] + 1)
    grid = {'max_depth': depths, 'max_features': features_num, 'random_state' : [0]}    
    gs = GridSearchCV(DecisionTreeClassifier(), grid, scoring='accuracy', cv=5, n_jobs=8)
    gs.fit(Xtrain, y)
    gs.best_params_['use_dummies'] = use_dummies
    del gs.best_params_["random_state"]
    return (gs.best_score_, "decision tree", gs.best_params_)

def eval_logistic_regression(Xtrain, Ytrain, C):
    model = LogisticRegression()
    x = StandardScaler().fit_transform(Xtrain)
    grid = {'penalty': ['l1', 'l2'], 'C': C, 'random_state' : [0]}    
    gs = GridSearchCV(LogisticRegression(), grid, scoring='accuracy', cv=5, n_jobs=8)
    gs.fit(x, Ytrain)
    del gs.best_params_["random_state"]
    return (gs.best_score_, "logistic regression", gs.best_params_)

def eval_knn_classifier(Xtrain, Ytrain, leaf_sizes):
    grid = { 'leaf_size': leaf_sizes }
    gs = GridSearchCV(KNeighborsClassifier(), grid, scoring='accuracy', cv=5, n_jobs=8)
    gs.fit(Xtrain, Ytrain)
    return (gs.best_score_, "knn classifier", gs.best_params_)

final_res = []

final_res.append(eval_decision_tree(Xtrain, Ytrain, np.arange(1, 25), True))
final_res.append(eval_decision_tree(Xtrain, Ytrain, np.arange(1, 25), False))
final_res.append(eval_logistic_regression(Xtrain, Ytrain, np.linspace(0.01, 2, 10)))
final_res.append(eval_knn_classifier(Xtrain, Ytrain, np.arange(10, 60, 10)))

final_res.sort(key = lambda x: x[0], reverse=True)

for r in final_res:
    print(("%f" % r[0]) + " " + str(r[1]) + " : " + str(r[2]))

0.962716 knn classifier : {'leaf_size': 10}
0.933779 logistic regression : {'penalty': 'l1', 'C': 0.45222222222222225}
0.818030 decision tree : {'use_dummies': True, 'max_features': 33, 'max_depth': 12}
0.816917 decision tree : {'use_dummies': False, 'max_features': 33, 'max_depth': 9}
