## Classifying handwritten digits

In [30]:
import numpy as np
from sklearn.datasets import load_digits

digits = load_digits()
X, y = digits.data, digits.target

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

# We keep 50% random examples for test
X_train, X_test, y_train, y_test = train_test_split(X,
                    y, test_size=0.5, random_state=42)

In [3]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC

svm = make_pipeline(MinMaxScaler(feature_range=(-1, 1)),
                    SVC(gamma='auto'))

In [4]:
cv_acc = cross_val_score(svm, X_train, y_train, cv=10)
test_acc = (svm.fit(X_train, y_train)
                       .score(X_test, y_test))
print(f'Cross-validation accuracy: {np.mean(cv_acc):0.4f}') 
print(f'Test accuracy: {test_acc:0.4f}')

Cross-validation accuracy: 0.9789
Test accuracy: 0.9700


In [5]:
from sklearn.model_selection import GridSearchCV
search_space = [{'svc__kernel': ['linear'], 
                 'svc__C': np.logspace(-3, 3, 7)},
                {'svc__kernel': ['rbf'], 
                 'svc__C':np.logspace(-3, 3, 7), 
                 'svc__gamma': np.logspace(-3, 2, 6)}]
gridsearch = GridSearchCV(svm, 
                          param_grid=search_space, 
                          refit=True, cv=10, n_jobs=-2)
gridsearch.fit(X_train, y_train)
print(f'Best parameter: {gridsearch.best_params_}')
cv_acc = gridsearch.best_score_
test_acc = gridsearch.score(X_test, y_test)
print(f'Cross-validation accuracy: {np.mean(cv_acc):0.4f}') 
print(f'Test accuracy: {test_acc:0.4f}')

Best parameter: {'svc__C': 10.0, 'svc__gamma': 0.01, 'svc__kernel': 'rbf'}
Cross-validation accuracy: 0.9867
Test accuracy: 0.9822


In [31]:
import pandas as pd
import numpy as np

In [32]:
a = pd.DataFrame(np.hstack([digits.target.reshape([-1,1]), digits.data]))

In [33]:
a.columns = ['target'] + list(map(lambda x: 'cell_'+str(x+1), range(64)))

In [34]:
import feather

In [35]:
b = a.astype(np.int8)

In [42]:
b.target = a.target.astype(int).apply(lambda x: 'digit_'+str(x))

In [43]:
b.shape

(1797, 65)

In [44]:
b.to_feather("mnist.feather")

In [45]:
b

Unnamed: 0,target,cell_1,cell_2,cell_3,cell_4,cell_5,cell_6,cell_7,cell_8,cell_9,...,cell_55,cell_56,cell_57,cell_58,cell_59,cell_60,cell_61,cell_62,cell_63,cell_64
0,digit_0,0,0,5,13,9,1,0,0,0,...,0,0,0,0,6,13,10,0,0,0
1,digit_1,0,0,0,12,13,5,0,0,0,...,0,0,0,0,0,11,16,10,0,0
2,digit_2,0,0,0,4,15,12,0,0,0,...,5,0,0,0,0,3,11,16,9,0
3,digit_3,0,0,7,15,13,1,0,0,0,...,9,0,0,0,7,13,13,9,0,0
4,digit_4,0,0,0,1,11,0,0,0,0,...,0,0,0,0,0,2,16,4,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1792,digit_9,0,0,4,10,13,6,0,0,0,...,4,0,0,0,2,14,15,9,0,0
1793,digit_0,0,0,6,16,13,11,1,0,0,...,1,0,0,0,6,16,14,6,0,0
1794,digit_8,0,0,1,11,15,1,0,0,0,...,0,0,0,0,2,9,13,6,0,0
1795,digit_9,0,0,2,10,7,0,0,0,0,...,2,0,0,0,5,12,16,12,0,0
