In [9]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [10]:
# Configuración de directorios
base_dir = r'C:\Users\gegdg\OneDrive\Documentos\.UVG\Anio4\Ciclo 2\Data Science\Lab4\Lab4_DataScience\PolyMNIST\MMNIST'
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')
modality_dirs = ['m0', 'm1', 'm2', 'm3', 'm4']

In [11]:
# Preparación del generador de datos con aumento
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(28, 28),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

validation_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(28, 28),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

Found 240000 images belonging to 15 classes.
Found 60000 images belonging to 15 classes.


In [13]:
# Modelo CNN 1
model1 = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 3)),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(15, activation='softmax')  # 5 clases de salida
])

model1.compile(optimizer=Adam(),
               loss='categorical_crossentropy',
               metrics=['accuracy'])

# Entrenamiento del Modelo 1
print("Entrenando Modelo 1...")
history1_aug = model1.fit(
    train_generator,
    epochs=10,
    validation_data=validation_generator
)
print("Modelo 1 terminado. Comenzando Modelo 2...")


Entrenando Modelo 1...
Epoch 1/10
[1m   1/7500[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:55:16[0m 1s/step - accuracy: 0.0625 - loss: 2.7312

  self._warn_if_super_not_called()


[1m7500/7500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m496s[0m 66ms/step - accuracy: 0.8677 - loss: 0.3217 - val_accuracy: 0.9722 - val_loss: 0.0722
Epoch 2/10
[1m7500/7500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m540s[0m 72ms/step - accuracy: 0.9635 - loss: 0.0985 - val_accuracy: 0.9826 - val_loss: 0.0507
Epoch 3/10
[1m7500/7500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m658s[0m 88ms/step - accuracy: 0.9724 - loss: 0.0754 - val_accuracy: 0.9896 - val_loss: 0.0307
Epoch 4/10
[1m7500/7500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m645s[0m 86ms/step - accuracy: 0.9771 - loss: 0.0661 - val_accuracy: 0.9905 - val_loss: 0.0282
Epoch 5/10
[1m7500/7500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m498s[0m 66ms/step - accuracy: 0.9789 - loss: 0.0619 - val_accuracy: 0.9872 - val_loss: 0.0436
Epoch 6/10
[1m7500/7500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m533s[0m 71ms/step - accuracy: 0.9807 - loss: 0.0567 - val_accuracy: 0.9925 - val_loss: 0.0246
Epoch 7/1

In [14]:
# Modelo CNN 2
model2 = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(15, activation='softmax')  # 5 clases de salida
])

model2.compile(optimizer=Adam(),
               loss='categorical_crossentropy',
               metrics=['accuracy'])

# Entrenamiento del Modelo 2
print("Entrenando Modelo 2...")
history2_aug = model2.fit(
    train_generator,
    epochs=10,
    validation_data=validation_generator
)
print("Modelo 2 terminado.")

Entrenando Modelo 2...
Epoch 1/10
[1m7500/7500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m245s[0m 32ms/step - accuracy: 0.8686 - loss: 0.3099 - val_accuracy: 0.9713 - val_loss: 0.0734
Epoch 2/10
[1m7500/7500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m244s[0m 33ms/step - accuracy: 0.9782 - loss: 0.0597 - val_accuracy: 0.9875 - val_loss: 0.0323
Epoch 3/10
[1m7500/7500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m511s[0m 68ms/step - accuracy: 0.9882 - loss: 0.0361 - val_accuracy: 0.9932 - val_loss: 0.0208
Epoch 4/10
[1m7500/7500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m543s[0m 72ms/step - accuracy: 0.9907 - loss: 0.0276 - val_accuracy: 0.9809 - val_loss: 0.0622
Epoch 5/10
[1m7500/7500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m450s[0m 60ms/step - accuracy: 0.9922 - loss: 0.0263 - val_accuracy: 0.9962 - val_loss: 0.0115
Epoch 6/10
[1m7500/7500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m296s[0m 40ms/step - accuracy: 0.9934 - loss: 0.0209 - val_accuracy: 0

In [15]:
# Función para cargar los datos para el modelo K-NN
def cargar_datos_knn(base_dir, modality_dirs):
    X = []
    y = []
    for idx, modality in enumerate(modality_dirs):
        modality_path = os.path.join(base_dir, modality)
        images = os.listdir(modality_path)
        for img_name in images:
            img_path = os.path.join(modality_path, img_name)
            img = plt.imread(img_path).flatten()  # Convertir la imagen a un vector plano
            X.append(img)
            y.append(idx)
    return np.array(X), np.array(y)

In [16]:
# Carga de datos de entrenamiento y prueba para K-NN
print("Cargando datos para el modelo K-NN...")
X_train, y_train = cargar_datos_knn(train_dir, modality_dirs)
X_test, y_test = cargar_datos_knn(test_dir, modality_dirs)
print("Datos cargados. Iniciando entrenamiento del modelo K-NN...")


Cargando datos para el modelo K-NN...
Datos cargados. Iniciando entrenamiento del modelo K-NN...


In [17]:
# Entrenamiento del modelo K-NN
knn = KNeighborsClassifier(n_neighbors=3, metric='euclidean')
knn.fit(X_train, y_train)


In [18]:
# Predicción y evaluación del modelo K-NN
y_pred = knn.predict(X_test)
print("Evaluación del modelo K-NN:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Evaluación del modelo K-NN:
Accuracy: 0.66822
Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.44      0.54     10000
           1       0.46      0.97      0.62     10000
           2       0.97      0.98      0.98     10000
           3       0.96      0.03      0.06     10000
           4       0.77      0.91      0.83     10000

    accuracy                           0.67     50000
   macro avg       0.77      0.67      0.61     50000
weighted avg       0.77      0.67      0.61     50000

Confusion Matrix:
 [[4435 3023    2   10 2530]
 [ 212 9748   37    2    1]
 [  13   61 9842    0   84]
 [ 966 8315  267  326  126]
 [ 844   95    1    0 9060]]


In [19]:
# Comparación de Resultados
print("\nComparación de Modelos:")
print("Modelo 1 (CNN): Mejor precisión en validación: {:.2f}%".format(max(history1_aug.history['val_accuracy']) * 100))
print("Modelo 2 (CNN): Mejor precisión en validación: {:.2f}%".format(max(history2_aug.history['val_accuracy']) * 100))
print("Modelo K-NN: Precisión en prueba: {:.2f}%".format(accuracy_score(y_test, y_pred) * 100))


Comparación de Modelos:
Modelo 1 (CNN): Mejor precisión en validación: 99.25%
Modelo 2 (CNN): Mejor precisión en validación: 99.62%
Modelo K-NN: Precisión en prueba: 66.82%
