In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data = pd.read_csv('mnist_784.csv')
print(data)

x = np.asanyarray(data.drop(columns=['class']))
y = np.asanyarray(data[['class']])

       pixel1  pixel2  pixel3  pixel4  ...  pixel782  pixel783  pixel784  class
0           0       0       0       0  ...         0         0         0      5
1           0       0       0       0  ...         0         0         0      0
2           0       0       0       0  ...         0         0         0      4
3           0       0       0       0  ...         0         0         0      1
4           0       0       0       0  ...         0         0         0      9
...       ...     ...     ...     ...  ...       ...       ...       ...    ...
69995       0       0       0       0  ...         0         0         0      2
69996       0       0       0       0  ...         0         0         0      3
69997       0       0       0       0  ...         0         0         0      4
69998       0       0       0       0  ...         0         0         0      5
69999       0       0       0       0  ...         0         0         0      6

[70000 rows x 785 columns]


In [38]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import metrics

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.1)

In [4]:
def resultados (model,x_test,y_test):
  y_pred = model.predict(x_test)

  print('Metricas: \n', metrics.classification_report(y_test,y_pred))
  print('matriz de confusion: \n', metrics.confusion_matrix(y_test,y_pred))

In [43]:
from sklearn.svm import SVC

model = Pipeline([('scaler', StandardScaler()),('PCA',PCA(n_components=50)),('cla',SVC(C=0.82))])
model.fit(x_train,y_train.ravel())

print('Train score: ', model.score(x_train, y_train))
print('Test score: ', model.score(x_test, y_test))

resultados(model,x_test,y_test)

Train score:  0.9798730158730159
Test score:  0.973
Metricas: 
               precision    recall  f1-score   support

           0       0.98      0.99      0.99       678
           1       0.97      0.98      0.98       771
           2       0.97      0.98      0.97       729
           3       0.97      0.95      0.96       740
           4       0.98      0.98      0.98       653
           5       0.97      0.96      0.97       594
           6       0.98      0.99      0.99       683
           7       0.96      0.97      0.97       744
           8       0.98      0.96      0.97       693
           9       0.97      0.97      0.97       715

    accuracy                           0.97      7000
   macro avg       0.97      0.97      0.97      7000
weighted avg       0.97      0.97      0.97      7000

matriz de confusion: 
 [[672   0   1   0   0   0   0   1   3   1]
 [  0 758   7   2   0   1   1   2   0   0]
 [  2   2 712   3   2   0   0   7   1   0]
 [  2   5   5 702   2  10

In [44]:
import pickle
#Guardando el modelo
pickle.dump(model,open('SVM_mnist_784.sav','wb'))

In [None]:
import pickle
#Cargando el modelo guardado
model = pickle.load(open('SVM_mnist_784.sav','rb'))

print('Train score: ', model.score(x_train, y_train))
print('Test score: ', model.score(x_test, y_test))

resultados(model,x_test,y_test)

In [50]:
from sklearn.neural_network import MLPClassifier

model = Pipeline([('scaler', StandardScaler()),('PCA',PCA(n_components=50)),('cla',MLPClassifier(hidden_layer_sizes=(500,500),alpha=0.01,max_iter=1500))])
model.fit(x_train,y_train.ravel())

print('Train score: ', model.score(x_train, y_train))
print('Test score: ', model.score(x_test, y_test))

resultados(model,x_test,y_test)


Train score:  0.997968253968254
Test score:  0.9801428571428571
Metricas: 
               precision    recall  f1-score   support

           0       0.99      0.99      0.99       678
           1       0.99      0.99      0.99       771
           2       0.98      0.96      0.97       729
           3       0.98      0.97      0.97       740
           4       0.98      0.99      0.99       653
           5       0.97      0.98      0.97       594
           6       0.98      0.99      0.99       683
           7       0.98      0.98      0.98       744
           8       0.97      0.97      0.97       693
           9       0.97      0.98      0.98       715

    accuracy                           0.98      7000
   macro avg       0.98      0.98      0.98      7000
weighted avg       0.98      0.98      0.98      7000

matriz de confusion: 
 [[673   0   1   0   0   0   1   0   2   1]
 [  0 763   2   1   0   0   0   2   2   1]
 [  4   2 700   6   3   0   2   6   4   2]
 [  0   0   2

In [51]:
import pickle
#Guardando el modelo
pickle.dump(model,open('MLP_mnist_784.sav','wb'))

In [None]:
import pickle
#Cargando el modelo guardado
model = pickle.load(open('MLP_mnist_784.sav','rb'))

print('Train score: ', model.score(x_train, y_train))
print('Test score: ', model.score(x_test, y_test))

resultados(model,x_test,y_test)