### Carga de datos, etc

In [1]:
import os
import re

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import cv2
import PIL.Image as Image

from skimage import io, color, transform
from skimage.feature import hog

import time

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, classification_report

In [2]:
gray_matrix = np.load('DatosDF\gray_matrix.npy', allow_pickle=True)
hog = np.load('DatosDF\hog.npy', allow_pickle=True)
estados = np.load('DatosDF\estados.npy', allow_pickle=True)
label = np.load('DatosDF\labels.npy', allow_pickle=True)

In [3]:
df = pd.DataFrame({'gray_matrix': gray_matrix,'hog': hog,'estados': estados,'label': label})

In [6]:
dfxusar = df[df['estados'] == 1]
elresto = df[df['estados'] == 0]
dfxusar

Unnamed: 0,gray_matrix,hog,estados,label
16,"[[175, 166, 185, 198, 202, 213, 212, 209, 210,...","[0.20444939, 0.034390625, 0.026585897, 0.01487...",1,angry
23,"[[26, 29, 30, 35, 45, 60, 108, 145, 161, 161, ...","[0.46974203, 0.15378156, 0.0056655, 0.0, 0.002...",1,angry
30,"[[17, 18, 19, 19, 17, 15, 16, 17, 18, 20, 18, ...","[0.16117984, 0.10060929, 0.12280247, 0.0026138...",1,angry
31,"[[0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 1, 13, 37, ...","[0.17848918, 0.014774916, 0.0, 0.0, 0.0, 0.0, ...",1,angry
45,"[[254, 254, 254, 254, 254, 254, 254, 254, 254,...","[0.025827257, 0.01296422, 0.020755643, 0.0, 0....",1,angry
...,...,...,...,...
35863,"[[9, 38, 69, 99, 86, 102, 111, 105, 65, 58, 64...","[0.23134665, 0.3235038, 0.13396424, 0.05265233...",1,surprise
35869,"[[253, 253, 254, 253, 170, 101, 105, 102, 85, ...","[0.4190711, 0.32569548, 0.0045682807, 0.002312...",1,surprise
35875,"[[252, 247, 146, 93, 94, 89, 99, 94, 93, 83, 9...","[0.32172725, 0.29283354, 0.09000543, 0.0720573...",1,surprise
35878,"[[33, 44, 49, 40, 35, 28, 23, 14, 9, 16, 17, 5...","[0.20763314, 0.31089163, 0.1581842, 0.02511550...",1,surprise


In [7]:
X = np.array(dfxusar['hog'].values.tolist())
y = dfxusar['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=19011)

# KNN

In [8]:
knn = KNeighborsClassifier(n_neighbors=5)  # Cambia 5 por el número de vecinos que desees utilizar
knn.fit(X_train, y_train)

In [9]:
y_pred_e= knn.predict(X_train)
accuracy = accuracy_score(y_train, y_pred_e)
print(f'Precisión del modelo: {accuracy}')

print(classification_report(y_train, y_pred_e))

Precisión del modelo: 0.9088937093275488
              precision    recall  f1-score   support

       angry       0.90      0.93      0.91       462
     disgust       0.86      0.76      0.81       435
        fear       0.91      0.90      0.90       579
       happy       0.92      1.00      0.96       650
     neutral       0.88      0.97      0.92       622
         sad       0.97      0.76      0.85       467
    surprise       0.94      0.98      0.96       473

    accuracy                           0.91      3688
   macro avg       0.91      0.90      0.90      3688
weighted avg       0.91      0.91      0.91      3688



In [13]:
# Realizar predicciones en el conjunto de prueba
y_pred = knn.predict(X_test)

# Evaluar el modelo
accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {round(accuracy,5)}')

print(classification_report(y_test, y_pred))

Precisión del modelo: 0.81148
              precision    recall  f1-score   support

       angry       0.75      0.75      0.75       101
     disgust       0.73      0.49      0.59       112
        fear       0.80      0.81      0.81       156
       happy       0.80      0.99      0.89       149
     neutral       0.80      0.95      0.87       153
         sad       0.95      0.62      0.75       119
    surprise       0.86      0.93      0.90       133

    accuracy                           0.81       923
   macro avg       0.81      0.79      0.79       923
weighted avg       0.81      0.81      0.80       923



## [Conjunto Pequeño] GridSearch - Optimizacion de parametros

### Primero

In [14]:
%%time
from sklearn.model_selection import GridSearchCV

knn = KNeighborsClassifier()

param_grid = {
    'n_neighbors': [3,6,9,12,15,18,21,24,27,30],
    'p':[1,2]
}

# Realiza la búsqueda en cuadrícula con validación cruzada
grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, cv=5, scoring='accuracy', verbose=10,n_jobs=8)
grid_search.fit(X_train, y_train)

# Muestra los mejores hiperparámetros encontrados
best_params = grid_search.best_params_
print(f"Mejores hiperparámetros: {best_params}")

# Entrena el modelo con los mejores hiperparámetros en todo el conjunto de entrenamiento
best_rf_classifier = grid_search.best_estimator_
best_rf_classifier.fit(X_train, y_train)

# Evalúa el modelo en el conjunto de prueba
y_pred = best_rf_classifier.predict(X_test)

# Evaluar el modelo
accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy}')

print(classification_report(y_test, y_pred))

Fitting 5 folds for each of 20 candidates, totalling 100 fits


        nan 0.75786257        nan 0.74024409        nan 0.72071571
        nan 0.70743143        nan 0.69984005        nan 0.6930613
        nan 0.68438407]


Mejores hiperparámetros: {'n_neighbors': 3, 'p': 2}
Precisión del modelo: 0.8212351029252438
              precision    recall  f1-score   support

       angry       0.70      0.82      0.75       101
     disgust       0.69      0.50      0.58       112
        fear       0.78      0.84      0.81       156
       happy       0.84      0.99      0.91       149
     neutral       0.86      0.96      0.91       153
         sad       0.95      0.58      0.72       119
    surprise       0.91      0.94      0.93       133

    accuracy                           0.82       923
   macro avg       0.82      0.80      0.80       923
weighted avg       0.82      0.82      0.81       923

CPU times: total: 2.31 s
Wall time: 48.5 s


## [Conjunto Grande] GridSearch - Optimizacion de parametros

Modelo aplicando los parametros optimizados

In [15]:
X_g = np.array(df['hog'].values.tolist())
y_g = df['label']
X_traing, X_testg, y_traing, y_testg = train_test_split(X_g, y_g, test_size=0.2, random_state=19011)

In [17]:
knn_g = KNeighborsClassifier(n_neighbors=3,p=2) 

In [19]:
knn_g.fit(X_traing, y_traing)
y_pred_eg= knn_g.predict(X_traing)

Precisión del modelo: 0.7091504406283744
              precision    recall  f1-score   support

       angry       0.56      0.83      0.67      3905
     disgust       0.61      0.77      0.68       438
        fear       0.62      0.70      0.66      4106
       happy       0.77      0.83      0.80      7238
     neutral       0.74      0.62      0.67      4928
         sad       0.83      0.50      0.62      4864
    surprise       0.83      0.76      0.79      3230

    accuracy                           0.71     28709
   macro avg       0.71      0.71      0.70     28709
weighted avg       0.73      0.71      0.71     28709



In [21]:
accuracy = accuracy_score(y_traing, y_pred_eg)
print(f'Precisión del modelo: {accuracy}')

print(classification_report(y_traing, y_pred_eg))

Precisión del modelo: 0.7091504406283744
              precision    recall  f1-score   support

       angry       0.56      0.83      0.67      3905
     disgust       0.61      0.77      0.68       438
        fear       0.62      0.70      0.66      4106
       happy       0.77      0.83      0.80      7238
     neutral       0.74      0.62      0.67      4928
         sad       0.83      0.50      0.62      4864
    surprise       0.83      0.76      0.79      3230

    accuracy                           0.71     28709
   macro avg       0.71      0.71      0.70     28709
weighted avg       0.73      0.71      0.71     28709



### Primero

In [None]:
%%time
from sklearn.model_selection import GridSearchCV

knn = KNeighborsClassifier()

param_grid = {
    'n_neighbors': [3,18,30],
    'p':[1,2]
}

# Realiza la búsqueda en cuadrícula con validación cruzada
grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, cv=5, scoring='accuracy', verbose=10,n_jobs=8)
grid_search.fit(X_train, y_train)

# Muestra los mejores hiperparámetros encontrados
best_params = grid_search.best_params_
print(f"Mejores hiperparámetros: {best_params}")

# Entrena el modelo con los mejores hiperparámetros en todo el conjunto de entrenamiento
best_rf_classifier = grid_search.best_estimator_
best_rf_classifier.fit(X_train, y_train)

# Evalúa el modelo en el conjunto de prueba
y_pred = best_rf_classifier.predict(X_test)

# Evaluar el modelo
accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy}')

print(classification_report(y_test, y_pred))

Fitting 5 folds for each of 20 candidates, totalling 100 fits


        nan 0.75786257        nan 0.74024409        nan 0.72071571
        nan 0.70743143        nan 0.69984005        nan 0.6930613
        nan 0.68438407]


Mejores hiperparámetros: {'n_neighbors': 3, 'p': 2}
Precisión del modelo: 0.8212351029252438
              precision    recall  f1-score   support

       angry       0.70      0.82      0.75       101
     disgust       0.69      0.50      0.58       112
        fear       0.78      0.84      0.81       156
       happy       0.84      0.99      0.91       149
     neutral       0.86      0.96      0.91       153
         sad       0.95      0.58      0.72       119
    surprise       0.91      0.94      0.93       133

    accuracy                           0.82       923
   macro avg       0.82      0.80      0.80       923
weighted avg       0.82      0.82      0.81       923

CPU times: total: 2.31 s
Wall time: 48.5 s
