Alan Alcántara Nagamatsu - https://medium.com/@a01640155/probando-distintos-métodos-de-clasificación-de-imágenes-bc71de18b7ec
Ramiro Alejandro Ruiz Carranza

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split

## Extracción y normalización de datos

Datos de MNIST-Fashion

In [2]:
mnist_train_data = pd.read_csv("fashion-mnist_train.csv")
mnist_test_data = pd.read_csv('fashion-mnist_test.csv')

In [3]:
mnist_X_train = normalize(mnist_train_data.iloc[:,1:], axis=0)
mnist_y_train = mnist_train_data.iloc[:,0]
mnist_X_test = normalize(mnist_test_data.iloc[:,1:], axis=0)
mnist_y_test = mnist_test_data.iloc[:,0]

Datos de emojis

In [2]:
emojis_data = np.loadtxt('emojis.txt')
emojis_X = normalize(emojis_data[:,1:], axis=0)
emojis_y = emojis_data[:,0]

In [3]:
emoji_X_train, emoji_X_test, emoji_y_train, emoji_y_test = train_test_split(emojis_X, emojis_y)

Datos de imágenes satelitales

In [10]:
satelite_data = np.loadtxt('satelitales.csv', delimiter=',')
satelite_X = normalize(satelite_data[:,1:], axis=0)
satelite_y = satelite_data[:,0]

In [11]:
satelite_X_train, satelite_X_test, satelite_y_train, satelite_y_test = train_test_split(satelite_X, satelite_y, stratify=satelite_y)

## Evaluación de modelos clásicos de aprendizaje supervisado

### Clasificación lineal - SVM, kernel = lineal

In [8]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import recall_score, accuracy_score

#### MNIST

In [9]:
for c in [i for i in range(10,100,10)]:
    mnist_clf = SVC(kernel='linear', C=c, random_state=4)
    mnist_clf.fit(mnist_X_train, mnist_y_train)
    mnist_lin_pred = mnist_clf.predict(mnist_X_test)
    mnist_lin_acc = accuracy_score(mnist_y_test, mnist_lin_pred)
    mnist_lin_rec = recall_score(mnist_y_test, mnist_lin_pred, average='weighted')
    print(f'C: {c}, Accuracy: {mnist_lin_acc}, Recall: {mnist_lin_rec}')

C: 10, Accuracy: 0.7764, Recall: 0.7764
C: 20, Accuracy: 0.7936, Recall: 0.7936
C: 30, Accuracy: 0.8049, Recall: 0.8049
C: 40, Accuracy: 0.8088, Recall: 0.8088
C: 50, Accuracy: 0.8124, Recall: 0.8124
C: 60, Accuracy: 0.8146, Recall: 0.8146
C: 70, Accuracy: 0.8164, Recall: 0.8164
C: 80, Accuracy: 0.8176, Recall: 0.8176
C: 90, Accuracy: 0.8185, Recall: 0.8185


#### Emojis

In [45]:
emoji_parameters = {'kernel':['linear'], 'C':[i for i in range(1,20,1)], 'gamma':['scale', 'auto']}

In [52]:
emoji_clf = GridSearchCV(SVC(), param_grid=emoji_parameters, scoring=('accuracy', 'recall_macro'), n_jobs = 5, verbose=3, refit='recall_macro')
emoji_clf.fit(emoji_X_train, emoji_y_train)

Fitting 5 folds for each of 38 candidates, totalling 190 fits


In [54]:
print(emoji_clf.best_params_)
print(emoji_clf.best_score_)

{'C': 5, 'gamma': 'scale', 'kernel': 'linear'}
0.8067575030943452


In [61]:
emoji_lin_pred = emoji_clf.predict(emoji_X_test)
emoji_lin_acc = accuracy_score(emoji_y_test,emoji_lin_pred)
emoji_lin_rec = recall_score(emoji_y_test,emoji_lin_pred, average='macro')

In [62]:
print(f'Accuracy: {emoji_lin_acc}, Recall: {emoji_lin_rec}')

Accuracy: 0.8495145631067961, Recall: 0.8473631771492242


#### Satelitales

In [14]:
satelite_parameters = {'kernel':['linear'], 'C':[i for i in range(1,500,10)], 'gamma':['scale', 'auto']}

In [15]:
satelite_l_clf = GridSearchCV(SVC(), param_grid=satelite_parameters, scoring=('accuracy', 'recall_macro'), n_jobs = 5, verbose=3, refit='recall_macro')
satelite_l_clf.fit(satelite_X_train, satelite_y_train)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


In [16]:
print(satelite_l_clf.best_params_)
print(satelite_l_clf.best_score_)

{'C': 491, 'gamma': 'scale', 'kernel': 'linear'}
0.7960178302090066


In [17]:
satelite_lin_pred = satelite_l_clf.predict(satelite_X_test)
satelite_lin_acc = accuracy_score(satelite_y_test,satelite_lin_pred)
satelite_lin_rec = recall_score(satelite_y_test,satelite_lin_pred, average='macro')
print(f'Accuracy: {satelite_lin_acc}, Recall: {satelite_lin_rec}')

Accuracy: 0.8217821782178217, Recall: 0.8222186567211951


### Clasificación no lineal

#### MNIST - SVM, kernel = Polinomial

In [24]:
mnist_r_clf = SVC(kernel='poly', C=1, random_state=4)
mnist_r_clf.fit(mnist_X_train, mnist_y_train)
mnist_rbf_pred = mnist_r_clf.predict(mnist_X_test)
mnist_rbf_acc = accuracy_score(mnist_y_test, mnist_rbf_pred)
mnist_rbf_rec = recall_score(mnist_y_test, mnist_rbf_pred, average='weighted')
print(f'C: {c}, Accuracy: {mnist_rbf_acc}, Recall: {mnist_rbf_rec}')

C: 90, Accuracy: 0.8568, Recall: 0.8568


ignorar C:90 en el output de arriba

In [25]:
for c in [i for i in range(10,100,10)]:
    mnist_clf = SVC(kernel='poly', C=c, random_state=4)
    mnist_clf.fit(mnist_X_train, mnist_y_train)
    mnist_lin_pred = mnist_clf.predict(mnist_X_test)
    mnist_lin_acc = accuracy_score(mnist_y_test, mnist_lin_pred)
    mnist_lin_rec = recall_score(mnist_y_test, mnist_lin_pred, average='weighted')
    print(f'C: {c}, Accuracy: {mnist_lin_acc}, Recall: {mnist_lin_rec}')

C: 10, Accuracy: 0.873, Recall: 0.873
C: 20, Accuracy: 0.8733, Recall: 0.8733
C: 30, Accuracy: 0.8758, Recall: 0.8758
C: 40, Accuracy: 0.876, Recall: 0.876
C: 50, Accuracy: 0.8758, Recall: 0.8758
C: 60, Accuracy: 0.8759, Recall: 0.8759
C: 70, Accuracy: 0.8754, Recall: 0.8754
C: 80, Accuracy: 0.8748, Recall: 0.8748
C: 90, Accuracy: 0.8745, Recall: 0.8745


In [26]:
for d in [i for i in range(3,7)]:
    mnist_clf = SVC(kernel='poly', C=40, degree=d, random_state=4)
    mnist_clf.fit(mnist_X_train, mnist_y_train)
    mnist_lin_pred = mnist_clf.predict(mnist_X_test)
    mnist_lin_acc = accuracy_score(mnist_y_test, mnist_lin_pred)
    mnist_lin_rec = recall_score(mnist_y_test, mnist_lin_pred, average='weighted')
    print(f'Degree: {d}, Accuracy: {mnist_lin_acc}, Recall: {mnist_lin_rec}')

Degree: 3, Accuracy: 0.876, Recall: 0.876
Degree: 4, Accuracy: 0.8629, Recall: 0.8629
Degree: 5, Accuracy: 0.8492, Recall: 0.8492
Degree: 6, Accuracy: 0.8394, Recall: 0.8394


Los parámetros finales fueron C=40 y degree=3

#### Emojis - SVM, kernel = Radial Basis Function

In [10]:
emoji_rbf_parameters = {'kernel':['rbf'], 'C':[i for i in range(1,20,1)], 'gamma':['scale', 'auto']}

In [11]:
emoji_r_clf = GridSearchCV(SVC(), param_grid=emoji_rbf_parameters, scoring=('accuracy', 'recall_macro'), n_jobs = 5, verbose=3, refit='recall_macro')
emoji_r_clf.fit(emoji_X_train, emoji_y_train)

Fitting 5 folds for each of 38 candidates, totalling 190 fits


In [12]:
print(emoji_r_clf.best_params_)
print(emoji_r_clf.best_score_)

{'C': 4, 'gamma': 'scale', 'kernel': 'rbf'}
0.8273890002776392


In [13]:
emoji_rbf_pred = emoji_r_clf.predict(emoji_X_test)
emoji_rbf_acc = accuracy_score(emoji_y_test,emoji_rbf_pred)
emoji_rbf_rec = recall_score(emoji_y_test,emoji_rbf_pred, average='macro')
print(f'Accuracy: {emoji_rbf_acc}, Recall: {emoji_rbf_rec}')

Accuracy: 0.8592233009708737, Recall: 0.8631030409362553


#### Satelitales - SVM, kernel= Radial Basis Function

In [26]:
satelite_rbf_parameters = {'kernel':['rbf'], 'C':[i for i in range(1,200,10)], 'gamma':['scale', 'auto']}

In [27]:
satelite_rbf_clf = GridSearchCV(SVC(), param_grid=satelite_rbf_parameters, scoring=('accuracy', 'recall_macro'), n_jobs = 5, verbose=3, refit='recall_macro')
satelite_rbf_clf.fit(satelite_X_train, satelite_y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits


In [28]:
print(satelite_rbf_clf.best_params_)
print(satelite_rbf_clf.best_score_)

{'C': 31, 'gamma': 'scale', 'kernel': 'rbf'}
0.8495873067175588


In [29]:
satelite_rbf_pred = satelite_rbf_clf.predict(satelite_X_test)
satelite_rbf_acc = accuracy_score(satelite_y_test,satelite_rbf_pred)
satelite_rbf_rec = recall_score(satelite_y_test,satelite_rbf_pred, average='macro')
print(f'Accuracy: {satelite_rbf_acc}, Recall: {satelite_rbf_rec}')

Accuracy: 0.8356435643564356, Recall: 0.8352752895951082


## Perceptron multicapa

In [20]:
from sklearn.neural_network import MLPClassifier

### MNIST

In [35]:
for capas in [[neur for j in range(n_caps)] for n_caps in range(1,100,20) for neur in range(20,120,20)]:
    mnist_p_clf = MLPClassifier(hidden_layer_sizes=capas)
    mnist_p_clf.fit(mnist_X_train, mnist_y_train)
    mnist_perc_pred = mnist_p_clf.predict(mnist_X_test)
    mnist_perc_acc = accuracy_score(mnist_y_test, mnist_perc_pred)
    mnist_perc_rec = recall_score(mnist_y_test, mnist_perc_pred, average='weighted')
    print(f'Neuronas: {capas[0]}, n_capas: {len(capas)}, accuracy: {mnist_perc_acc}, recall: {mnist_perc_rec}')



Neuronas: 20, n_capas: 1, accuracy: 0.81, recall: 0.81




Neuronas: 40, n_capas: 1, accuracy: 0.8349, recall: 0.8349




Neuronas: 60, n_capas: 1, accuracy: 0.8375, recall: 0.8375




Neuronas: 80, n_capas: 1, accuracy: 0.8409, recall: 0.8409




Neuronas: 100, n_capas: 1, accuracy: 0.8494, recall: 0.8494
Neuronas: 20, n_capas: 21, accuracy: 0.6696, recall: 0.6696
Neuronas: 40, n_capas: 21, accuracy: 0.7809, recall: 0.7809
Neuronas: 60, n_capas: 21, accuracy: 0.8376, recall: 0.8376
Neuronas: 80, n_capas: 21, accuracy: 0.7753, recall: 0.7753
Neuronas: 100, n_capas: 21, accuracy: 0.814, recall: 0.814
Neuronas: 20, n_capas: 41, accuracy: 0.1, recall: 0.1
Neuronas: 40, n_capas: 41, accuracy: 0.1, recall: 0.1
Neuronas: 60, n_capas: 41, accuracy: 0.1, recall: 0.1
Neuronas: 80, n_capas: 41, accuracy: 0.1, recall: 0.1
Neuronas: 100, n_capas: 41, accuracy: 0.1, recall: 0.1
Neuronas: 20, n_capas: 61, accuracy: 0.1, recall: 0.1
Neuronas: 40, n_capas: 61, accuracy: 0.1, recall: 0.1
Neuronas: 60, n_capas: 61, accuracy: 0.1, recall: 0.1
Neuronas: 80, n_capas: 61, accuracy: 0.1, recall: 0.1
Neuronas: 100, n_capas: 61, accuracy: 0.1, recall: 0.1
Neuronas: 20, n_capas: 81, accuracy: 0.1, recall: 0.1
Neuronas: 40, n_capas: 81, accuracy: 0.1, rec

Mejor resultado, 100 neuronas, 1 capa, Accuracy=0.8494, Recall=0.8494

### Emojis

In [29]:
emoji_perc_params = {'hidden_layer_sizes': [[neur for j in range(n_caps)] for n_caps in range(1,100,5) for neur in range(20,110,10)]}

In [30]:
emoji_p_clf = GridSearchCV(MLPClassifier(random_state=4), param_grid=emoji_perc_params, scoring=('accuracy', 'recall_macro'), n_jobs = 5, verbose=3, refit='recall_macro')
emoji_p_clf.fit(emoji_X_train, emoji_y_train)

Fitting 5 folds for each of 180 candidates, totalling 900 fits




In [31]:
print(emoji_p_clf.best_params_)
print(emoji_p_clf.best_score_)

{'hidden_layer_sizes': [20]}
0.7826041863529085


In [32]:
emoji_perc_pred = emoji_p_clf.predict(emoji_X_test)
emoji_perc_acc = accuracy_score(emoji_y_test,emoji_perc_pred)
emoji_perc_rec = recall_score(emoji_y_test,emoji_perc_pred, average='macro')
print(f'Accuracy: {emoji_perc_acc}, Recall: {emoji_perc_rec}')

Accuracy: 0.813915857605178, Recall: 0.8161913282725827


### Satelitales

In [18]:
satelite_perc_params = {'hidden_layer_sizes': [[neur for j in range(n_caps)] for n_caps in range(5,20,1) for neur in range(80,160,20)]}

In [21]:
satelite_p_clf = GridSearchCV(MLPClassifier(random_state=4), param_grid=satelite_perc_params, scoring=('accuracy', 'recall_macro'), n_jobs = 5, verbose=3, refit='recall_macro')
satelite_p_clf.fit(satelite_X_train, satelite_y_train)

Fitting 5 folds for each of 60 candidates, totalling 300 fits


In [22]:
print(satelite_p_clf.best_params_)
print(satelite_p_clf.best_score_)

{'hidden_layer_sizes': [140, 140, 140, 140, 140]}
0.7922762181795795


In [23]:
satelite_perc_pred = satelite_p_clf.predict(satelite_X_test)
satelite_perc_acc = accuracy_score(satelite_y_test,satelite_perc_pred)
satelite_perc_rec = recall_score(satelite_y_test,satelite_perc_pred, average='macro')
print(f'Accuracy: {satelite_perc_acc}, Recall: {satelite_perc_rec}')

Accuracy: 0.8495049504950495, Recall: 0.8497366076109275
