In [12]:
import pandas as pd
import numpy as np
import cv2 as cv
# import seaborn as sns
import os
import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model

import cv2
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten
from keras.optimizers import Adam

import warnings
import sys
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [13]:


import cv2

# Load the image
img = cv2.imread('/kaggle/input/fer2013plus/fer2013plus/fer2013/train/anger/fer0000010.png')

# Get dimensions
height, width, channels = img.shape
print(f'Dimensions: {width}x{height}')





Dimensions: 48x48


In [14]:
# Contains the FER Training data
# Create a data generator with augmentation
train_data_generator = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.15,
    height_shift_range=0.15,
    shear_range=0.15,
    zoom_range=0.15,
    horizontal_flip=True,
)

fer_training_data = train_data_generator.flow_from_directory(
    '/kaggle/input/fer2013plus/fer2013plus/fer2013/train',
    target_size=(100, 100),
    batch_size=64,
    color_mode='grayscale',
    class_mode='categorical',
    classes=['anger', 'disgust', 'fear', 'happiness', 'neutral', 'sadness', 'surprise']
)

fer_training_data

Found 28221 images belonging to 7 classes.


<keras.src.legacy.preprocessing.image.DirectoryIterator at 0x7eac4cdccbe0>

In [15]:
# Contains the FER Test data
test_data_generator = ImageDataGenerator(rescale=1./255)
fer_test_data = test_data_generator.flow_from_directory(
    '/kaggle/input/fer2013plus/fer2013plus/fer2013/test',
    target_size = (100, 100),
    batch_size = 64,
    color_mode = 'grayscale',
    class_mode = 'categorical',
    classes=['anger', 'disgust', 'fear', 'happiness', 'neutral', 'sadness', 'surprise']
)
fer_test_data

Found 7048 images belonging to 7 classes.


<keras.src.legacy.preprocessing.image.DirectoryIterator at 0x7eac4cdce950>

In [16]:
from keras.layers import BatchNormalization
from keras.regularizers import l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

In [17]:
from tensorflow.keras import optimizers
optims = [
    optimizers.Nadam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, name='Nadam'),
    optimizers.Adam(0.001),
]

In [None]:
#imports the Sequential model, which means we’ll be stacking layers one after another—like a straight pipeline.
from keras.models import Sequential     
#Convolutional layer (used to detect patterns/features in images like edges or textures)
# MaxPooling2D: Downsamples the image, reducing size while keeping important info
# Flatten: Turns 2D data into 1D so it can be fed into dense layers
# Dense: Fully connected layers (used for decision making/classification)
# Dropout: Randomly turns off neurons to prevent overfitting
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential()

model.add(
        Conv2D(
            filters=256,
            kernel_size=(3,3),
            input_shape=(48, 48, 1),
            activation='elu',
            padding='same',
            kernel_initializer='he_normal',
            name='conv2d_1'
        )
    )
model.add(BatchNormalization(name='batchnorm_1'))
model.add(
        Conv2D(
            filters=256,
            kernel_size=(5,5),
            activation='elu',
            padding='same',
            kernel_initializer='he_normal',
            name='conv2d_2'
        )
    )
model.add(BatchNormalization(name='batchnorm_2'))
    
model.add(MaxPooling2D(pool_size=(2,2), name='maxpool2d_1'))
model.add(Dropout(0.25, name='dropout_1'))

model.add(
        Conv2D(
            filters=256,
            kernel_size=(3,3),
            activation='elu',
            padding='same',
            kernel_initializer='he_normal',
            name='conv2d_3'
        )
    )
model.add(BatchNormalization(name='batchnorm_3'))
model.add(
        Conv2D(
            filters=256,
            kernel_size=(3,3),
            activation='elu',
            padding='same',
            kernel_initializer='he_normal',
            name='conv2d_4'
        )
    )
model.add(BatchNormalization(name='batchnorm_4'))
    
model.add(MaxPooling2D(pool_size=(2,2), name='maxpool2d_2'))
model.add(Dropout(0.25, name='dropout_2'))

model.add(
        Conv2D(
            filters=256,
            kernel_size=(3,3),
            activation='elu',
            padding='same',
            kernel_initializer='he_normal',
            name='conv2d_5'
        )
    )
model.add(BatchNormalization(name='batchnorm_5'))
model.add(
        Conv2D(
            filters=512,
            kernel_size=(3,3),
            activation='elu',
            padding='same',
            kernel_initializer='he_normal',
            name='conv2d_6'
        )
    )
model.add(BatchNormalization(name='batchnorm_6'))
    
model.add(MaxPooling2D(pool_size=(2,2), name='maxpool2d_3'))
model.add(Dropout(0.25, name='dropout_3'))

model.add(Flatten(name='flatten'))
        
model.add(
        Dense(
            256,
            activation='elu',
            kernel_initializer='he_normal',
            name='dense_1'
        )
    )
model.add(BatchNormalization(name='batchnorm_7'))
    
model.add(Dropout(0.25, name='dropout_4'))
    
model.add(
        Dense(
            7,
            activation='softmax',
            name='out_layer'
        )
    )

# Uses categorical_crossentropy for multi-class classification.
# Adam optimizer is efficient for training.
# Tracks accuracy during training and testing. 
model.compile(
        loss='categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    
model.summary()

In [21]:
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.layers import GlobalAveragePooling2D, Reshape, multiply, Permute, Concatenate, Lambda, Attention, add, Activation
from keras import backend as K



# Channel Attention (SE Block)
def channel_attention(input_feature, ratio=8):
    channel_axis = -1 if K.image_data_format() == 'channels_last' else 1
    channel = input_feature.shape[channel_axis]
    
    shared_layer_one = Dense(channel//ratio,
                             activation='relu',
                             kernel_initializer='he_normal',
                             use_bias=True,
                             bias_initializer='zeros')
    
    shared_layer_two = Dense(channel,
                             kernel_initializer='he_normal',
                             use_bias=True,
                             bias_initializer='zeros')
    
    avg_pool = GlobalAveragePooling2D()(input_feature)    
    avg_pool = Reshape((1, 1, channel))(avg_pool)
    avg_pool = shared_layer_one(avg_pool)
    avg_pool = shared_layer_two(avg_pool)
    
    max_pool = GlobalMaxPooling2D()(input_feature)
    max_pool = Reshape((1, 1, channel))(max_pool)
    max_pool = shared_layer_one(max_pool)
    max_pool = shared_layer_two(max_pool)
    
    cbam_feature = add([avg_pool, max_pool])
    cbam_feature = Activation('sigmoid')(cbam_feature)
    
    return multiply([input_feature, cbam_feature])

# Spatial Attention (CBAM Block)
def spatial_attention(input_feature):
    kernel_size = 7
    
    if K.image_data_format() == "channels_first":
        channel = input_feature.shape[1]
        cbam_feature = Permute((2, 3, 1))(input_feature)
    else:
        channel = input_feature.shape[-1]
        cbam_feature = input_feature
    
    # Using Lambda layer with explicit output shape
    avg_pool = Lambda(lambda x: tf.reduce_mean(x, axis=3, keepdims=True))(cbam_feature)
    max_pool = Lambda(lambda x: tf.reduce_max(x, axis=3, keepdims=True))(cbam_feature)
    
    concat = Concatenate(axis=3)([avg_pool, max_pool])
    cbam_feature = Conv2D(filters=1,
                          kernel_size=kernel_size,
                          strides=1,
                          padding='same',
                          activation='sigmoid',
                          kernel_initializer='he_normal',
                          use_bias=False)(concat)
    
    if K.image_data_format() == "channels_first":
        cbam_feature = Permute((3, 1, 2))(cbam_feature)
        
    return multiply([input_feature, cbam_feature])

def cbam_block(cbam_feature, ratio=8):
    cbam_feature = channel_attention(cbam_feature, ratio)
    cbam_feature = spatial_attention(cbam_feature)
    return cbam_feature

# SE Block (Squeeze-and-Excitation Block)
def se_block(input_feature, ratio=16):
    channel_axis = -1 if K.image_data_format() == 'channels_last' else 1
    channels = input_feature.shape[channel_axis]
    se_shape = (1, 1, channels)

    se = GlobalAveragePooling2D()(input_feature)
    se = Reshape(se_shape)(se)
    se = Dense(channels // ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)(se)
    se = Dense(channels, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(se)

    return multiply([input_feature, se])

# Model building starts here
input_tensor = Input(shape=(100, 100, 1))

# Convolutional Block 1
x = Conv2D(filters=256, kernel_size=(3,3), activation='elu', padding='same', kernel_initializer='he_normal')(input_tensor)
x = BatchNormalization()(x)
x = se_block(x)
x = cbam_block(x)  # Apply CBAM block here


# Convolutional Block 2 Example
x = Conv2D(filters=256, kernel_size=(3,3), activation='elu', padding='same', kernel_initializer='he_normal')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Dropout(0.25)(x)

# Convolutional Block 3
x = Conv2D(filters=128, kernel_size=(3,3), activation='elu', padding='same', kernel_initializer='he_normal')(x)
x = BatchNormalization()(x)
x = se_block(x)
x = cbam_block(x)  # Apply CBAM block here


# Convolutional Block 4 
x = Conv2D(filters=128, kernel_size=(3,3), activation='elu', padding='same', kernel_initializer='he_normal')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Dropout(0.25)(x)

# Convolutional Block 5
x = Conv2D(filters=64, kernel_size=(3,3), activation='elu', padding='same', kernel_initializer='he_normal')(x)
x = BatchNormalization()(x)
x = se_block(x)
x = cbam_block(x)  # Apply CBAM block here


# Convolutional Block 6
x = Conv2D(filters=64, kernel_size=(3,3), activation='elu', padding='same', kernel_initializer='he_normal')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2,2))(x)
x = Dropout(0.25)(x)

# Final layers 
x = Flatten()(x)
x = Dense(256, activation='relu', kernel_initializer='he_normal')(x)
x = BatchNormalization()(x)
x = Dropout(0.25)(x)
output_tensor = Dense(7, activation='softmax')(x)

model = Model(inputs=input_tensor, outputs=output_tensor)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()


In [22]:
# Reduce learning rate when a metric has stopped improving
early_stopping = EarlyStopping(
    monitor='val_accuracy',
    min_delta=0.00005,
    patience=11,
    verbose=1,
    restore_best_weights=True,
)

lr_scheduler = ReduceLROnPlateau(
    monitor='val_accuracy',
    factor=0.5,
    patience=7,
    min_lr=1e-7,
    verbose=1,
)

callbacks = [
    early_stopping,
    lr_scheduler,
]

In [23]:
history = model.fit(
    fer_training_data,
    epochs=60, 
    validation_data=fer_test_data,
    batch_size = 64,
    callbacks=callbacks,
    verbose=2  # Shows detailed training progress
)

Epoch 1/60
441/441 - 358s - 812ms/step - accuracy: 0.3813 - loss: 1.6716 - val_accuracy: 0.4640 - val_loss: 1.4409 - learning_rate: 0.0010
Epoch 2/60
441/441 - 119s - 271ms/step - accuracy: 0.4864 - loss: 1.4029 - val_accuracy: 0.5541 - val_loss: 1.2542 - learning_rate: 0.0010
Epoch 3/60
441/441 - 120s - 272ms/step - accuracy: 0.5658 - loss: 1.2150 - val_accuracy: 0.6359 - val_loss: 1.0215 - learning_rate: 0.0010
Epoch 4/60
441/441 - 120s - 272ms/step - accuracy: 0.6168 - loss: 1.0727 - val_accuracy: 0.6464 - val_loss: 1.0005 - learning_rate: 0.0010
Epoch 5/60
441/441 - 120s - 272ms/step - accuracy: 0.6575 - loss: 0.9567 - val_accuracy: 0.6873 - val_loss: 0.8642 - learning_rate: 0.0010
Epoch 6/60
441/441 - 120s - 272ms/step - accuracy: 0.6710 - loss: 0.9036 - val_accuracy: 0.6976 - val_loss: 0.8280 - learning_rate: 0.0010
Epoch 7/60
441/441 - 120s - 272ms/step - accuracy: 0.6832 - loss: 0.8743 - val_accuracy: 0.6791 - val_loss: 0.9076 - learning_rate: 0.0010
Epoch 8/60
441/441 - 120s -

In [25]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(x=list(range(1, len(history.history['accuracy']) + 1)), y=history.history['accuracy'], mode='lines+markers', name='Training Accuracy'))
fig.add_trace(go.Scatter(x=list(range(1, len(history.history['val_accuracy']) + 1)), y=history.history['val_accuracy'], mode='lines+markers', name='Validation Accuracy'))

fig.update_layout(title='Training vs. Validation Accuracy', xaxis_title='Epoch', yaxis_title='Accuracy', template="plotly_white")

fig.show()

fig = go.Figure()

# Training vs. Validation Loss
fig.add_trace(go.Scatter(x=list(range(1, len(history.history['loss']) + 1)), y=history.history['loss'], mode='lines+markers', name='Training Loss'))
fig.add_trace(go.Scatter(x=list(range(1, len(history.history['val_loss']) + 1)), y=history.history['val_loss'], mode='lines+markers', name='Validation Loss'))

# Layout for Loss
fig.update_layout(title='Training vs. Validation Loss', xaxis_title='Epoch', yaxis_title='Loss', template="plotly_white")

# Show the plot
fig.show()

In [26]:
loss, accuracy = model.evaluate(fer_test_data, verbose=1)

print(f'Validation Loss: {loss}')
print(f'Validation Accuracy: {accuracy * 100:.2f}%')

[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 120ms/step - accuracy: 0.8201 - loss: 0.5216
Validation Loss: 0.5129662752151489
Validation Accuracy: 81.90%


In [43]:
model.save("/kaggle/working/emotion_model_archi.h5")


In [45]:
model.save("/kaggle/working/emotion_model_archi.keras")