In [1]:
import os
import cv2
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

In [2]:
from sklearn.preprocessing import LabelEncoder

def load_images(folder):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            images.append(img)
            labels.append(filename.split('_')[0])  # Asumiendo que las imágenes están etiquetadas como 'expresión_imagen.jpg'
    
    label_encoder = LabelEncoder()
    labels = label_encoder.fit_transform(labels)
    
    return images, labels

In [3]:
def preprocess_images(images):
    processed_images = []
    for image in images:
        if image is None:
            continue
        if len(image.shape) != 3 or image.shape[2] != 3:
            continue
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convertir a escala de grises
        image = cv2.resize(image, (48, 48))  # Cambiar el tamaño a 48x48
        image = image / 255.0  # Escalar los valores de los píxeles a [0, 1]
        processed_images.append(image)
    return np.array(processed_images).reshape(-1, 48, 48, 1)  # Asegurarse de que las imágenes tienen la forma correcta

In [4]:
def create_and_train_model(images, labels):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(48, 48, 1)),  # 1 canal de color en lugar de 3
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(5, activation='softmax')  # 5 clases de expresiones faciales
    ])

    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

    model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

    model.save('model.h5')

In [5]:
from tensorflow.keras.models import load_model
import numpy as np

def predict_expression():
    model = load_model('model.h5')
    cap = cv2.VideoCapture(0)

    # Definir las etiquetas de las clases
    class_labels = ["Neutral", "Feliz", "Enojado", "Triste", "Sorprendido"]

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        display_frame = frame.copy()  # Crear una copia del marco para mostrar
        frame = preprocess_images([frame])
        prediction = model.predict(frame)

        # Obtener la etiqueta de la clase predicha
        predicted_class = np.argmax(prediction)
        predicted_label = class_labels[predicted_class]

        # Mostrar la predicción en la ventana de la webcam
        cv2.putText(display_frame, predicted_label, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.imshow('Webcam', display_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [6]:
def main():
    images, labels = load_images('Images')
    images = preprocess_images(images)
    create_and_train_model(images, labels)
    predict_expression()

if __name__ == '__main__':
    main()

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 600ms/step - accuracy: 0.2500 - loss: 1.6147 - val_accuracy: 0.2000 - val_loss: 1.6719
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.2500 - loss: 1.5490 - val_accuracy: 0.2000 - val_loss: 1.7480
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 0.4000 - loss: 1.5859 - val_accuracy: 0.2000 - val_loss: 1.7739
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.2500 - loss: 1.6239 - val_accuracy: 0.2000 - val_loss: 1.7502
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.3500 - loss: 1.5628 - val_accuracy: 0.4000 - val_loss: 1.7088
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.1500 - loss: 1.6588 - val_accuracy: 0.2000 - val_loss: 1.6782
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/s

KeyboardInterrupt: 

: 