In [1]:

import os
import cv2
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from collections import Counter



In [None]:
train_labels = pd.read_csv('/kaggle/input/raf-db-dataset/train_labels.csv')
test_labels = pd.read_csv('/kaggle/input/raf-db-dataset/test_labels.csv')

label_map = {2: "fear", 4: "happy", 5: "sad", 6: "angry"}

train_labels = train_labels[train_labels["label"].isin(label_map.keys())]
train_labels["label"] = train_labels["label"].map(label_map)
# Display the first few rows of the train labels to check the structure
print(train_labels)

test_labels = test_labels[test_labels["label"].isin(label_map.keys())]
test_labels["label"] = test_labels["label"].map(label_map)
# Display the first few rows of the train labels to check the structure
# test_labels

In [None]:
label_map = {2: "fear", 4: "happy", 5: "sad", 6: "angry"}


In [None]:
# Load the labels CSV files
train_labels = pd.read_csv('/kaggle/input/raf-db-dataset/train_labels.csv')
test_labels = pd.read_csv('/kaggle/input/raf-db-dataset/test_labels.csv')

# Display the first few rows of the train labels to check the structure
print(train_labels.head())

classes = ['surprise', 'fear', 'disgust', 'happy', 'sad', 'angry', 'neutral']


label_map = {label: (idx+1) for idx, label in enumerate(classes)}

print(label_map)

In [None]:
import os
import numpy as np
import cv2
from tqdm import tqdm

def load_data(dataset_dir, label_map, excluded_labels, new_label_map):
    images = []
    labels = []
    
    for label, idx in tqdm(label_map.items()):
        if idx in excluded_labels:  # Skip excluded labels
            continue
        
        folder_path = os.path.join(dataset_dir, str(idx))  
        if not os.path.exists(folder_path):
            print(f"Warning: {folder_path} does not exist.")
            continue
        
        for filename in os.listdir(folder_path):
            if not filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                continue  # Skip non-image files
            
            img_path = os.path.join(folder_path, filename)
            img = cv2.imread(img_path)
            
            if img is None:
                print(f"Warning: Unable to read {img_path}")
                continue  # Skip unreadable images
            
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            images.append(img_rgb)
            labels.append(new_label_map[idx])  # Assign new label
    
    return np.array(images), np.array(labels)




# Exclude labels 1 (neutral), 2 (happy), and 6 (disgust)
excluded_labels = {1, 3, 7}

# New sequential label mapping
new_label_map = {2: 1, 4: 2, 5: 3, 6: 4}  # Mapping for the remaining labels

# Load train and test datasets
train_images, train_labels = load_data('/kaggle/input/raf-db-dataset/DATASET/train', label_map, excluded_labels, new_label_map)
test_images, test_labels = load_data('/kaggle/input/raf-db-dataset/DATASET/test', label_map, excluded_labels, new_label_map)

# Print dataset distribution after remapping
train_label_counts = Counter(train_labels)
test_label_counts = Counter(test_labels)

print("Train label counts:", train_label_counts)
print("Test label counts:", test_label_counts)
print("Train images shape:", train_images.shape)
print("Test images shape:", test_images.shape)


In [None]:
# Calculate the total number of images
total_images = len(train_images) + len(test_images)

# Calculate percentages
train_percentage = (len(train_images) / total_images) * 100
test_percentage = (len(test_images) / total_images) * 100

# Data for pie chart
labels = ['Training Data', 'Testing Data']
sizes = [train_percentage, test_percentage]
colors = ['cornflowerblue', 'lightcoral']

# Plot the pie chart

plt.figure(figsize=(3, 3))
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors)
plt.title('Percentage Distribution of Training and Testing Data')
plt.axis('equal')  
plt.show()

In [None]:
from collections import Counter




In [None]:
# Count the distribution of classes in train and test datasets
train_label_counts = Counter(train_labels)
test_label_counts = Counter(test_labels)
print('train_label_counts ',train_label_counts)
# Convert to sorted lists for plotting
train_classes = sorted(train_label_counts.keys())
train_counts = [train_label_counts[cls] for cls in train_classes]

test_classes = sorted(test_label_counts.keys())
test_counts = [test_label_counts[cls] for cls in test_classes]
print('test_counts',test_counts)
# Calculate the total number of examples in train and test datasets
total_train = sum(train_counts)
total_test = sum(test_counts)

# Calculate percentages for train and test datasets
train_percentages = [(count / total_train) * 100 for count in train_counts]
test_percentages = [(count / total_test) * 100 for count in test_counts]

# Plot the distribution with percentages
plt.figure(figsize=(8, 6))
x = range(len(classes))
bar_width = 0.35

plt.bar(x, train_counts, width=bar_width, label="Train", alpha=0.7, color="cornflowerblue")
plt.bar([p + bar_width for p in x], test_counts, width=bar_width, label="Test", alpha=0.7, color="crimson")

# Annotate percentages on bars
for i, (train_count, test_count) in enumerate(zip(train_counts, test_counts)):
    plt.text(i, train_count + 0.005 * total_train, f"{train_percentages[i]:.1f}%", ha='center', color="blue", fontsize=9)
    plt.text(i + bar_width, test_count + 0.005 * total_test, f"{test_percentages[i]:.1f}%", ha='center', color="red", fontsize=9)

# Add labels and title
plt.xticks([p + bar_width / 2 for p in x], classes, rotation=45)
plt.xlabel("Emotion Class")
plt.ylabel("Number of Examples")
plt.title("Distribution of Examples in Train and Test Datasets with Percentages")
plt.legend()
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
X_train = np.concatenate([train_images, test_images], axis=0)
Y_train = np.concatenate([train_labels, test_labels], axis=0)

# Display the shapes to confirm
print(X_train.shape)
print(Y_train.shape)

In [None]:
# Count the distribution of classes in the resampled train dataset
train_label_counts_resampled = Counter(Y_train)
    
# Convert to sorted lists for plotting
train_classes_resampled = sorted(train_label_counts_resampled.keys())
train_counts_resampled = [train_label_counts_resampled[cls] for cls in train_classes_resampled]
# Plot the distribution
plt.figure(figsize=(6, 3))
x_labels = [ 'Fear', 'Happy', 'Sad', 'Angry']
    
# Bar plot for the resampled distribution
plt.bar(x_labels, train_counts_resampled, color="steelblue")
    
# Add labels and title
plt.xlabel("Emotion Class")
plt.ylabel("Number of Examples")
plt.title('')
plt.tight_layout()
    
# Show the plot
plt.show()

In [None]:
def plot_class_distribution(y, title):
    # Count the distribution of classes in the resampled train dataset
    train_label_counts_resampled = Counter(y)
    
    # Convert to sorted lists for plotting
    train_classes_resampled = sorted(train_label_counts_resampled.keys())
    train_counts_resampled = [train_label_counts_resampled[cls] for cls in train_classes_resampled]
    
    # Plot the distribution
    plt.figure(figsize=(6, 3))
    x_labels = [ 'Fear', 'Happy', 'Sad', 'Angry']
    
    # Bar plot for the resampled distribution
    plt.bar(x_labels, train_counts_resampled, color="steelblue")
    
    # Add labels and title
    plt.xlabel("Emotion Class")
    plt.ylabel("Number of Examples")
    plt.title(title)
    plt.tight_layout()
    
    # Show the plot
    plt.show()

In [None]:
# Function to reduce the size of a specific class in the dataset
def reduce_class(X, y, target_class, target_size):
    # Separate the target class
    class_indices = np.where(y == target_class)[0]
    non_class_indices = np.where(y != target_class)[0]
    
    # Randomly sample the target class to the desired size
    reduced_class_indices = np.random.choice(class_indices, target_size, replace=False)
    
    # Combine the reduced class with the other classes
    final_indices = np.concatenate([reduced_class_indices, non_class_indices])
    X_reduced = X[final_indices]
    y_reduced = y[final_indices]
    
    return X_reduced, y_reduced

target_class = 2  # The 'happy' class
target_size = 3500
X_train_reduced, y_train_reduced = reduce_class(X_train, Y_train, target_class, target_size)
# Plot the new distribution after reduction
plot_class_distribution(y_train_reduced, "Class Distribution After Reduction")

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
def augment_classes(images, labels, target_counts):
    # Initialisation de la génération d'images augmentées
    datagen = ImageDataGenerator(
        rotation_range=10,             
        width_shift_range=0.1,         
        height_shift_range=0.1,  
        zoom_range=0.1,
        vertical_flip=False,               
        horizontal_flip=True, 
        channel_shift_range=50.0,
        fill_mode='nearest'
    )

    augmented_images = images.copy()  # Nous copions les images pour conserver les originales
    augmented_labels = labels.copy()  # Idem pour les labels

    # Pour chaque classe, nous augmentons le nombre d'échantillons
    for target_class, target_count in target_counts.items():
        # Filtrage des images et labels pour la classe cible
        class_images = images[labels == target_class]
        class_labels = labels[labels == target_class]
        # Calcul du nombre d'échantillons à générer
        augment_count = target_count - len(class_images)

        if augment_count > 0:
            print(f'Classe {target_class}:  has {len(class_images)} samples  are agumented samples are {augment_count} .')

            # Création d'un itérateur pour la classe cible
            class_images_augmented = []
            class_labels_augmented = []

            # Appliquer l'augmentation de manière itérative
            for batch in datagen.flow(class_images, batch_size=1, seed=42):
                aug_image = batch[0].astype(np.uint8)
                class_images_augmented.append(aug_image)
                class_labels_augmented.append(target_class)
                
                # Arrêter quand le nombre souhaité d'images augmentées est atteint
                if len(class_images_augmented) >= augment_count:
                    break
                    # Ajouter les images augmentées au jeu de données original
            augmented_images = np.vstack((augmented_images, np.array(class_images_augmented)))
            augmented_labels = np.hstack((augmented_labels, np.array(class_labels_augmented)))

    return augmented_images, augmented_labels

# Example usage
target_counts = {1: 3500, 3: 3500, 4: 3500, }  # Target counts for each class
X_train_augmented, y_train_augmented = augment_classes(X_train_reduced, y_train_reduced, target_counts)

# Visualize the class distribution after augmentation
plot_class_distribution(y_train_augmented, "Class Distribution After Augmentation")

In [None]:
from sklearn.model_selection import GridSearchCV,train_test_split


In [None]:
# Split the data into training and testing sets (75% train, 25% test)
X_train, X_test, Y_train, Y_test = train_test_split(X_train_augmented, 
                                                    y_train_augmented, 
                                                    test_size=0.25, 
                                                    shuffle=True,
                                                    random_state=42)

# Calculate the total number of images
total_images = len(X_train) + len(X_test)

# Calculate percentages
train_percentage = (len(X_train) / total_images) * 100
test_percentage = (len(X_test) / total_images) * 100

# Data for pie chart
labels = ['Training Data', 'Testing Data']
sizes = [train_percentage, test_percentage]
colors = ['cornflowerblue', 'lightcoral']

# Plot the pie chart
plt.figure(figsize=(3, 3))
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors)
plt.title('Percentage Distribution of Training and Testing Data')
plt.axis('equal')  
plt.show()

In [None]:
def normalize_images(images):
    # Normalize pixel values to [0, 1]
    return images / 255.0

train_images_normalized = normalize_images(X_train)
test_images_normalized = normalize_images(X_test)

In [None]:
def reshape_images(images, model_type='CNN'):
        return images.reshape((images.shape[0], 100, 100, 3))

train_images_SVMreshaped = reshape_images(train_images_normalized, model_type='SVM')
test_images_SVMreshaped = reshape_images(test_images_normalized, model_type='SVM')

train_images_CNNreshaped = reshape_images(train_images_normalized, model_type='CNN')
test_images_CNNreshaped = reshape_images(test_images_normalized, model_type='CNN')

In [None]:
from tensorflow.keras.utils import to_categorical


In [None]:
# Convert labels to categorical for CNN
Y_train_cat = to_categorical(Y_train - 1, num_classes=len(classes))
Y_test_cat = to_categorical(Y_test - 1, num_classes=len(classes))

In [None]:
datagen = ImageDataGenerator(
    rotation_range=20,             
    width_shift_range=0.1,         
    height_shift_range=0.1,  
    vertical_flip=False,               
    horizontal_flip=True, 
    fill_mode='nearest'
)

# Create the generator for training
train_generator = datagen.flow(train_images_CNNreshaped, Y_train_cat, batch_size=64)

In [None]:
from tensorflow.keras.models import Model, Sequential, save_model

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint



In [None]:
# Define the CNN model
cnn_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(test_images_CNNreshaped[0].shape)),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    Conv2D(512, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),

    Dense(len(classes), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
cnn_model.summary()

In [None]:
# Train the CNN model
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=10, min_delta=0.0001, verbose=1) 
early_stop = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True, verbose=1)  
checkpoint = ModelCheckpoint(filepath='best_CNNModel.keras', monitor='val_accuracy', save_best_only=True, verbose=1) 

CNN_History = cnn_model.fit(
    train_generator,
    epochs=60, 
    batch_size = 32,
    validation_data=(test_images_CNNreshaped, Y_test_cat), 
    callbacks=[reduce_lr, early_stop, checkpoint]
)