## Mayank Raj
### 22BAI1118

In [7]:
import os
import pandas as pd
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Model, load_model  
image_directory = '/kaggle/input/all-images/images_by_emotion'
csv_file = '/kaggle/input/emotionss/images_emotions.csv'

data = pd.read_csv(csv_file)

image_names = data['Image_Name'].values  
emotions = data['Emotion'].values  

emotion_dict = {emotion: idx for idx, emotion in enumerate(np.unique(emotions))}
print("Emotion Dictionary:", emotion_dict)

labels = np.array([emotion_dict[emotion] for emotion in emotions])

images = []
for img_name in image_names:
    img_path = os.path.join(image_directory, img_name)  
    img = cv2.imread(img_path)  
    if img is not None:  
        img = cv2.resize(img, (48, 48))  
        images.append(img)
    else:
        print(f"Warning: Image {img_name} not found.")

X = np.array(images)
Y = np.array(labels)

X = X.astype('float32') / 255.0

Y = to_categorical(Y, num_classes=len(emotion_dict))

X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)
print(f"Training data shape: {X_train.shape}, Validation data shape: {X_val.shape}")


Emotion Dictionary: {'ANGRY': 0, 'CONTEMPT': 1, 'DISGUST': 2, 'FEAR': 3, 'JOY': 4, 'NEUTRAL': 5, 'SAD': 6, 'SURPRISE': 7}
Training data shape: (1792, 48, 48, 3), Validation data shape: (448, 48, 48, 3)


## Custom CNN

In [21]:
def create_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(48, 48, 3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(len(emotion_dict), activation='softmax'))  
    return model

model = create_model()
model.summary()

In [2]:

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model_checkpoint = ModelCheckpoint('best_cnn_model.keras', save_best_only=True, monitor='val_accuracy', mode='max')

history = model.fit(X_train, Y_train, epochs=200, validation_data=(X_val, Y_val), batch_size=32, callbacks=[model_checkpoint])

best_model = load_model('best_cnn_model.keras')
val_loss, val_accuracy = best_model.evaluate(X_val, Y_val)
print(f"Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}")


Emotion Dictionary: {'ANGRY': 0, 'CONTEMPT': 1, 'DISGUST': 2, 'FEAR': 3, 'JOY': 4, 'NEUTRAL': 5, 'SAD': 6, 'SURPRISE': 7}
Training data shape: (1792, 48, 48, 3), Validation data shape: (448, 48, 48, 3)
Epoch 1/200


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.2094 - loss: 1.9185 - val_accuracy: 0.2411 - val_loss: 1.8508
Epoch 2/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2201 - loss: 1.8601 - val_accuracy: 0.2835 - val_loss: 1.7966
Epoch 3/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2409 - loss: 1.8130 - val_accuracy: 0.2857 - val_loss: 1.7885
Epoch 4/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2619 - loss: 1.7874 - val_accuracy: 0.2902 - val_loss: 1.7810
Epoch 5/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.2543 - loss: 1.8034 - val_accuracy: 0.2857 - val_loss: 1.7633
Epoch 6/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.3038 - loss: 1.7543 - val_accuracy: 0.3125 - val_loss: 1.7674
Epoch 7/200
[1m56/56[0m [32m━━━━━━━━━━━━━━

## Simple MobileNetV2

In [22]:

base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x) 
predictions = Dense(len(emotion_dict), activation='softmax')(x)  

model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model_checkpoint = ModelCheckpoint('best_mobilenet_model.keras', save_best_only=True, monitor='val_accuracy', mode='max')

model.summary()

In [3]:

history = model.fit(X_train, Y_train, batch_size=32,
                    epochs=200,
                    validation_data=(X_val, Y_val),
                    callbacks=[model_checkpoint])
best_model_V2 = load_model('best_mobilenet_model.keras')

val_loss, val_accuracy = best_model_V2.evaluate(X_val, Y_val)
print(f"Best Model Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}")



Emotion Dictionary: {'ANGRY': 0, 'CONTEMPT': 1, 'DISGUST': 2, 'FEAR': 3, 'JOY': 4, 'NEUTRAL': 5, 'SAD': 6, 'SURPRISE': 7}
Training data shape: (1792, 224, 224, 3), Validation data shape: (448, 224, 224, 3)
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Epoch 1/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 111ms/step - accuracy: 0.1957 - loss: 2.1352 - val_accuracy: 0.2723 - val_loss: 1.8879
Epoch 2/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 46ms/step - accuracy: 0.3362 - loss: 1.7342 - val_accuracy: 0.2902 - val_loss: 1.8070
Epoch 3/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - accuracy: 0.3951 - loss: 1.5828 - val_accuracy: 0.2790 - val_loss: 1.8497
Epoch 4/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

## Complex MobileNetV2

In [23]:

base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)  
x = BatchNormalization()(x)  

x = Dense(512)(x)
x = BatchNormalization()(x)  
x = Activation('relu')(x) 
x = Dropout(0.5)(x)  

x = Dense(256)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.5)(x)

predictions = Dense(len(emotion_dict), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model_checkpoint = ModelCheckpoint('best_mobilenet_complex_model.keras', save_best_only=True, monitor='val_accuracy', mode='max')
model.summary()

In [6]:

history = model.fit(X_train, Y_train, batch_size=32,
                    epochs=200,
                    validation_data=(X_val, Y_val),
                    callbacks=[model_checkpoint])

best_model_CV2 = load_model('best_mobilenet_complex_model.keras')

val_loss, val_accuracy = best_model_CV2.evaluate(X_val, Y_val)
print(f"Best Model Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}")



Emotion Dictionary: {'ANGRY': 0, 'CONTEMPT': 1, 'DISGUST': 2, 'FEAR': 3, 'JOY': 4, 'NEUTRAL': 5, 'SAD': 6, 'SURPRISE': 7}
Training data shape: (1792, 224, 224, 3), Validation data shape: (448, 224, 224, 3)
Epoch 1/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 105ms/step - accuracy: 0.1935 - loss: 2.5504 - val_accuracy: 0.3304 - val_loss: 1.8092
Epoch 2/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - accuracy: 0.3755 - loss: 1.7437 - val_accuracy: 0.3259 - val_loss: 1.7772
Epoch 3/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step - accuracy: 0.4238 - loss: 1.5876 - val_accuracy: 0.3013 - val_loss: 1.7814
Epoch 4/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 46ms/step - accuracy: 0.4830 - loss: 1.4226 - val_accuracy: 0.3438 - val_loss: 1.7728
Epoch 5/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - accuracy: 0.5475 - loss: 1.2325 - val_accuracy: 0.3192 -

## Simple VGG16

In [24]:

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)  
predictions = Dense(len(emotion_dict), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model_checkpoint = ModelCheckpoint('best_vgg16_model.keras', save_best_only=True, monitor='val_accuracy', mode='max')
model.summary()

In [5]:

history = model.fit(X_train, Y_train, batch_size=32,
                    epochs=200,
                    validation_data=(X_val, Y_val),
                    callbacks=[model_checkpoint])

best_model_VGG16 = load_model('best_vgg16_model.keras')

val_loss, val_accuracy = best_model_VGG16.evaluate(X_val, Y_val)
print(f"Best Model Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}")

Emotion Dictionary: {'ANGRY': 0, 'CONTEMPT': 1, 'DISGUST': 2, 'FEAR': 3, 'JOY': 4, 'NEUTRAL': 5, 'SAD': 6, 'SURPRISE': 7}
Training data shape: (1792, 224, 224, 3), Validation data shape: (448, 224, 224, 3)
Epoch 1/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 190ms/step - accuracy: 0.1942 - loss: 2.2671 - val_accuracy: 0.2478 - val_loss: 1.8578
Epoch 2/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 174ms/step - accuracy: 0.2054 - loss: 1.9718 - val_accuracy: 0.2812 - val_loss: 1.8346
Epoch 3/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 180ms/step - accuracy: 0.2400 - loss: 1.9085 - val_accuracy: 0.2902 - val_loss: 1.8099
Epoch 4/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 179ms/step - accuracy: 0.2593 - loss: 1.8230 - val_accuracy: 0.3036 - val_loss: 1.7952
Epoch 5/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 174ms/step - accuracy: 0.2448 - loss: 1.8529 - val_accuracy: 

## Complex VGG16

In [25]:

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x) 
x = BatchNormalization()(x) 
x = Dropout(0.5)(x) 
predictions = Dense(len(emotion_dict), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers[:15]:  
    layer.trainable = False

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model_checkpoint = ModelCheckpoint('_complex_vgg16_model.keras', save_best_only=True, monitor='val_accuracy', mode='max')
model.summary()

In [1]:

history = model.fit(X_train, Y_train, batch_size=32,
                    epochs=200,
                    validation_data=(X_val, Y_val),
                    callbacks=[model_checkpoint])

best_model_complex_vgg16 = load_model('_complex_vgg16_model.keras')

val_loss, val_accuracy = best_model_complex_vgg16.evaluate(X_val, Y_val)
print(f"Best Model Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}")


Emotion Dictionary: {'ANGRY': 0, 'CONTEMPT': 1, 'DISGUST': 2, 'FEAR': 3, 'JOY': 4, 'NEUTRAL': 5, 'SAD': 6, 'SURPRISE': 7}
Training data shape: (1792, 224, 224, 3), Validation data shape: (448, 224, 224, 3)
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Epoch 1/200


I0000 00:00:1728480115.414600     102 service.cc:145] XLA service 0x7813c4113a90 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1728480115.414683     102 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1728480115.414689     102 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5


[1m 1/56[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m17:35[0m 19s/step - accuracy: 0.0938 - loss: 3.7116

I0000 00:00:1728480132.252609     102 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 205ms/step - accuracy: 0.1514 - loss: 3.2175 - val_accuracy: 0.1853 - val_loss: 3.5955
Epoch 2/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 187ms/step - accuracy: 0.1779 - loss: 2.7230 - val_accuracy: 0.2210 - val_loss: 2.7512
Epoch 3/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 175ms/step - accuracy: 0.1933 - loss: 2.5169 - val_accuracy: 0.1830 - val_loss: 2.2760
Epoch 4/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 188ms/step - accuracy: 0.2008 - loss: 2.3825 - val_accuracy: 0.2455 - val_loss: 1.8841
Epoch 5/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 182ms/step - accuracy: 0.2296 - loss: 2.2577 - val_accuracy: 0.2455 - val_loss: 1.8169
Epoch 6/200
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 195ms/step - accuracy: 0.2416 - loss: 2.1822 - val_accuracy: 0.2746 - val_loss: 1.9552
Epoch 7/200
[1m56/56[0m [

### Creating desired CSV using 4th model: complex VGG16

In [18]:
import os
import pandas as pd
import numpy as np
import cv2
from tensorflow.keras.models import load_model
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense, BatchNormalization
from tensorflow.keras.models import Model

image_directory = '/kaggle/input/face-emotion-recognition-1/TestSet/TestSet'
image_names = [img for img in os.listdir(image_directory) if img.endswith(('.jpg', '.jpeg', '.png'))]

csv_file_path = '/kaggle/working/image_predictions.csv'
df = pd.DataFrame(image_names, columns=['Image_Name'])
df.to_csv(csv_file_path, index=False)

print(f"CSV file created at: {csv_file_path}")

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
predictions = Dense(8, activation='softmax')(x)  
model = Model(inputs=base_model.input, outputs=predictions)

model.load_weights('/kaggle/working/_complex_vgg16_model.keras')

emotion_dict = {'ANGRY': 0, 'CONTEMPT': 1, 'DISGUST': 2, 'FEAR': 3, 'JOY': 4, 'NEUTRAL': 5, 'SAD': 6, 'SURPRISE': 7}

predicted_emotions = []
confidence_levels = []
x_coords = []
y_coords = []
widths = []
heights = []

haarcascade_path = '/kaggle/input/model-1/keras/default/1/haarcascade_frontalface_default.xml'

face_cascade = cv2.CascadeClassifier(haarcascade_path)
for img_name in image_names:
    img_path = os.path.join(image_directory, img_name)
    
    img = cv2.imread(img_path)
    if img is not None:
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        faces = face_cascade.detectMultiScale(gray_img, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

        img_resized = cv2.resize(img, (224, 224))
        img_array = img_to_array(img_resized) / 255.0
        img_array = np.expand_dims(img_array, axis=0)
        
        predictions = model.predict(img_array)
        predicted_class = np.argmax(predictions, axis=1)[0]
        
        predicted_emotion = list(emotion_dict.keys())[list(emotion_dict.values()).index(predicted_class)]

        predicted_emotions.append(predicted_emotion)
        confidence_levels.append(confidence)

        if len(faces) > 0:  
            for (x, y, w, h) in faces:
                x_coords.append(x)
                y_coords.append(y)
                widths.append(w)
                heights.append(h)
                break  
        else:
            x_coords.append(0) 
            y_coords.append(0)
            widths.append(0)
            heights.append(0)

df['Predicted_Emotion'] = predicted_emotions
df['Confidence_Level'] = confidence_levels
df['X_Coordinate'] = x_coords
df['Y_Coordinate'] = y_coords
df['Width'] = widths
df['Height'] = heights

df.to_csv(csv_file_path, index=False)
print(f"Results saved to: {csv_file_path}")


CSV file created at: /kaggle/working/image_predictions.csv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1

# Conclucion
### In my study of facial emotion recognition, I evaluated several models to determine which architecture yielded the best performance. The results showed that the Complex VGG16 model achieved the highest validation accuracy of 39.96%, outperforming other models. While it had a higher validation loss compared to some, its superior accuracy indicates it is better at classifying emotions in the dataset. The Complex VGG16 model's ability to capture intricate patterns in facial images makes it the most effective choice for this task. Therefore, I chose the Complex VGG16 model for its enhanced performance and potential for further improvement in facial emotion recognition.