In [None]:
from sklearn import datasets, svm, metrics
import matplotlib.pyplot as plt
import datetime as dt

X, y = datasets.fetch_openml('mnist_784', version=1, return_X_y=True)

X = X.to_numpy()
y = y.to_numpy()

# show test data
muestra = True

if muestra:
    #print(len(X[0]))
    #print(X[0])
    im = X[0].reshape(28,28)
    print(im.shape)
    plt.imshow(im, cmap='gray', vmin=0, vmax=255)
    plt.show()

print("DataSet count:", len(X))
nMuestras = 10
plt.figure(figsize=(nMuestras, 5))
for i in range(nMuestras):
    im = X[i].reshape(28,28)
    l1_plot = plt.subplot(nMuestras/5, 5, i + 1)
    l1_plot.imshow(im, cmap='gray', vmin=0, vmax=255)
    l1_plot.set_xlabel('y = %i' % int(y[i]))


In [None]:
print(type(X))
print('Tamaño:', len(X))

In [None]:
#---------------- classification begins -----------------
# scale data for [0,255] -> [0,1]
# sample smaller size for testing
# full dataset classification
X_data = X/255.0
Y = y

# split data to train and test
# from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_data, Y, test_size=0.2, random_state=42)


In [None]:
################ Classifier with good params ###########
# Create a classifier: a support vector classifier
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html

param_C = 5 # Regularization parameter
param_gamma = 0.05 # Kernel coefficient
classifier = svm.SVC(C=param_C,gamma=param_gamma)

# We learn the digits on train part
start_time = dt.datetime.now()
print('Start learning at {}'.format(str(start_time)))
classifier.fit(X_train, y_train)
end_time = dt.datetime.now() 
print('Stop learning {}'.format(str(end_time)))
elapsed_time= end_time - start_time
print('Elapsed learning {}'.format(str(elapsed_time)))

In [None]:
########################################################
# Now predict the value of the test
expected = y_test
predicted = classifier.predict(X_test)

print("Classification report for classifier %s:\n%s\n"
      % (classifier, metrics.classification_report(expected, predicted)))
      
#plot_confusion_matrix
disp = metrics.plot_confusion_matrix(classifier, X_test, y_test)
disp.figure_.suptitle("Confusion Matrix")
print("Confusion matrix:\n%s" % disp.confusion_matrix)
plt.show()

print("Validación: %d muestras"% len(y_test))
print("Accuracy={}".format(metrics.accuracy_score(expected, predicted)))
#print("Val_acc: %.3f"% classifier.score(X_test, y_test))
print("")

In [None]:
from joblib import dump

dump(classifier, 'modelo_mnist_svm.joblib')

## Referencias

El presente trabajo se baso fundamental en los siguientes trabajos:

  1. GitHub: [ksopyla/svm_mnist_digit_classification](https://github.com/ksopyla/svm_mnist_digit_classification).
  2. Scikit-learn: [Recognizing hand-written digits](https://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html).
  3. Scikit-learn: [MNIST classification using multinomial logistic + L1](https://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_logistic_regression_mnist.html).