In [2]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Path to your dataset root (each emotion is a subfolder)
DATASET_DIR = 'SampleDataset'  # <-- Change this to your dataset path

# Target image size for CNN input (height, width)
IMG_SIZE = (64, 64)  # You can adjust this as needed

# Get sorted list of emotion folders for consistent class indices
emotions = sorted([d for d in os.listdir(DATASET_DIR) if os.path.isdir(os.path.join(DATASET_DIR, d))])

# Map emotion folder names to class indices
class_map = {emotion: idx for idx, emotion in enumerate(emotions)}

# Lists to hold processed images and corresponding labels
images = []
labels = []

# Loop through each emotion folder
for emotion in emotions:
    emotion_folder = os.path.join(DATASET_DIR, emotion)
    for img_name in os.listdir(emotion_folder):
        # Only process image files
        if not img_name.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif')):
            continue
        img_path = os.path.join(emotion_folder, img_name)
        # Read the image
        img = cv2.imread(img_path)
        if img is None:
            continue  # Skip unreadable files
        # Resize image to target size
        img_resized = cv2.resize(img, IMG_SIZE)
        # Convert to grayscale
        img_gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)
        # Normalize pixel values to [0, 1]
        img_normalized = img_gray / 255.0
        # Append processed image and its label
        images.append(img_normalized)
        labels.append(class_map[emotion])

# Convert lists to numpy arrays
images = np.array(images)
labels = np.array(labels)

# Reshape images to add channel dimension (needed for CNN input: (samples, height, width, channels))
images = images.reshape(-1, IMG_SIZE[0], IMG_SIZE[1], 1)

# Split dataset into training and testing sets (80% train, 20% test, stratified by label)
X_train, X_test, y_train, y_test = train_test_split(
    images, labels, test_size=0.2, random_state=42, stratify=labels
)

# Print dataset shapes for verification
print(f'Training data shape: {X_train.shape}')    # (num_train_samples, 64, 64, 1)
print(f'Testing data shape: {X_test.shape}')      # (num_test_samples, 64, 64, 1)
print(f'Training labels shape: {y_train.shape}')  # (num_train_samples,)
print(f'Testing labels shape: {y_test.shape}')    # (num_test_samples,)
print(f'Class mapping: {class_map}')              # {'angry': 0, 'happy': 1, ...}

# Now X_train, y_train, X_test, y_test are ready for CNN training


KeyboardInterrupt: 

In [4]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Path to your dataset root (each emotion is a subfolder)
DATASET_DIR = 'SampleDataset'  # <-- Change this to your dataset path

# Directory to save preprocessed images
PREPROCESSED_DIR = 'preprocessed_images'  # <-- Change this to your desired output path

# Target image size for CNN input (height, width)
IMG_SIZE = (64, 64)  # You can adjust this as needed

# Get sorted list of emotion folders for consistent class indices
emotions = sorted([d for d in os.listdir(DATASET_DIR) if os.path.isdir(os.path.join(DATASET_DIR, d))])

# Map emotion folder names to class indices
class_map = {emotion: idx for idx, emotion in enumerate(emotions)}

# Create directory structure for preprocessed images
for emotion in emotions:
    os.makedirs(os.path.join(PREPROCESSED_DIR, emotion), exist_ok=True)

# Lists to hold processed images and corresponding labels (optional, for later use)
images = []
labels = []

# Loop through each emotion folder
for emotion in emotions:
    emotion_folder = os.path.join(DATASET_DIR, emotion)
    save_folder = os.path.join(PREPROCESSED_DIR, emotion)
    for img_name in os.listdir(emotion_folder):
        # Only process image files
        if not img_name.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif')):
            continue
        img_path = os.path.join(emotion_folder, img_name)
        # Read the image
        img = cv2.imread(img_path)
        if img is None:
            continue  # Skip unreadable files
        # Resize image to target size
        img_resized = cv2.resize(img, IMG_SIZE)
        # Convert to grayscale
        img_gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)
        # Normalize pixel values to [0, 1]
        img_normalized = img_gray / 255.0
        # Save preprocessed image (scaled back to 0-255 for saving)
        save_path = os.path.join(save_folder, img_name)
        cv2.imwrite(save_path, (img_normalized * 255).astype(np.uint8))
        # Optionally, store in memory for later use
        images.append(img_normalized)
        labels.append(class_map[emotion])

# Convert lists to numpy arrays for CNN input (optional)
images = np.array(images)
labels = np.array(labels)
images = images.reshape(-1, IMG_SIZE[0], IMG_SIZE[1], 1)

# Split dataset into training and testing sets (optional)
X_train, X_test, y_train, y_test = train_test_split(
    images, labels, test_size=0.2, random_state=42, stratify=labels
)

# Print dataset shapes for verification
print(f'Training data shape: {X_train.shape}')    # (num_train_samples, 64, 64, 1)
print(f'Testing data shape: {X_test.shape}')      # (num_test_samples, 64, 64, 1)
print(f'Training labels shape: {y_train.shape}')  # (num_train_samples,)
print(f'Testing labels shape: {y_test.shape}')    # (num_test_samples,)
print(f'Class mapping: {class_map}')              # {'angry': 0, 'happy': 1, ...}


Training data shape: (7200, 64, 64, 1)
Testing data shape: (1800, 64, 64, 1)
Training labels shape: (7200,)
Testing labels shape: (1800,)
Class mapping: {'angry': 0, 'boaring': 1, 'disgust': 2, 'fear': 3, 'happy': 4, 'neural': 5, 'sad': 6, 'stress': 7, 'suprise': 8}


In [1]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(0))


True
1
0
NVIDIA GeForce RTX 4060 Laptop GPU


In [6]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm

# Set device to CUDA if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Parameters
IMG_SIZE = 64
BATCH_SIZE = 64
EPOCHS = 30
DATASET_DIR = 'preprocessed_images'  # Your preprocessed images
NUM_CLASSES = 9

# Custom Dataset
class EmotionDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.samples = []
        self.labels = []
        self.transform = transform
        self.class_map = {name: idx for idx, name in enumerate(sorted(os.listdir(root_dir)))}
        for emotion in self.class_map:
            folder = os.path.join(root_dir, emotion)
            for fname in os.listdir(folder):
                if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif')):
                    self.samples.append(os.path.join(folder, fname))
                    self.labels.append(self.class_map[emotion])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img = cv2.imread(self.samples[idx], cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        if self.transform:
            img = self.transform(img)
        else:
            img = torch.tensor(img, dtype=torch.float32).unsqueeze(0) / 255.0
        label = self.labels[idx]
        return img, label

# Data transforms
transform = transforms.Compose([
    transforms.ToTensor(),  # Converts HxW [0,255] to 1xHxW [0,1]
    transforms.Normalize([0.5], [0.5])  # Normalize to [-1, 1]
])

# Prepare dataset and dataloaders
full_dataset = EmotionDataset(DATASET_DIR, transform=transform)
indices = np.arange(len(full_dataset))
train_idx, val_idx = train_test_split(indices, test_size=0.2, stratify=full_dataset.labels, random_state=42)
train_set = torch.utils.data.Subset(full_dataset, train_idx)
val_set = torch.utils.data.Subset(full_dataset, val_idx)

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# Optimized CNN Model
class EmotionCNN(nn.Module):
    def __init__(self, num_classes):
        super(EmotionCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2), nn.Dropout(0.25),

            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2), nn.Dropout(0.25)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * (IMG_SIZE//4) * (IMG_SIZE//4), 256), nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

model = EmotionCNN(NUM_CLASSES).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_losses = []
val_accuracies = []

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0

    # Wrap train_loader with tqdm for progress bar
    with tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}", unit="batch") as tepoch:
        for imgs, labels in tepoch:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * imgs.size(0)
            tepoch.set_postfix(loss=loss.item())

    avg_loss = running_loss / len(train_loader.dataset)
    train_losses.append(avg_loss)

    # Validation
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_acc = correct / total
    val_accuracies.append(val_acc)

    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {avg_loss:.4f} - Val Acc: {val_acc:.4f}")

# Plot training loss and validation accuracy
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(range(1, EPOCHS+1), train_losses, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss over Epochs')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(range(1, EPOCHS+1), val_accuracies, label='Validation Accuracy', color='green')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Validation Accuracy over Epochs')
plt.legend()

plt.tight_layout()
plt.show()

Using device: cuda


Epoch 1/30: 100%|██████████| 113/113 [00:44<00:00,  2.52batch/s, loss=1.9] 


Epoch 1/30 - Loss: 2.4308 - Val Acc: 0.2722


Epoch 2/30: 100%|██████████| 113/113 [00:16<00:00,  6.88batch/s, loss=1.94]


Epoch 2/30 - Loss: 1.7935 - Val Acc: 0.3467


Epoch 3/30: 100%|██████████| 113/113 [00:16<00:00,  7.02batch/s, loss=1.55]


Epoch 3/30 - Loss: 1.6748 - Val Acc: 0.4056


Epoch 4/30: 100%|██████████| 113/113 [00:16<00:00,  6.91batch/s, loss=1.47]


KeyboardInterrupt: 

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import cv2
import numpy as np

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Class labels (update to match your dataset folder order)
class_labels = ['Angry', 'Boring', 'Disgust', 'Fear', 'Happy', 'Neural', 'Sad', 'Stress', 'Suprise']

# Model definition (must match training)
class EmotionCNN(nn.Module):
    def __init__(self, num_classes):
        super(EmotionCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2), nn.Dropout(0.25),

            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2), nn.Dropout(0.25)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 16 * 16, 256), nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# Load model
model = EmotionCNN(len(class_labels)).to(device)
model.load_state_dict(torch.load("emotion_cnn.pth", map_location=device))
model.eval()

# Preprocessing for webcam frames
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# Load Haar Cascade for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

cap = cv2.VideoCapture(0)
print("Press 'q' to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        break
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)
    for (x, y, w, h) in faces:
        face = gray[y:y+h, x:x+w]
        face_resized = cv2.resize(face, (64, 64))
        face_tensor = transform(face_resized).unsqueeze(0).to(device)
        with torch.no_grad():
            output = model(face_tensor)
            pred = torch.argmax(output, 1).item()
            label = class_labels[pred]
        color = (0, 255, 0)
        cv2.rectangle(frame, (x, y), (x+w, y+h), color, 2)
        cv2.putText(frame, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
    cv2.imshow('Webcam Emotion Recognition', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()


In [None]:
model.save('emotion_model.keras')  # Keras format recommended[5][6]


In [12]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import cv2
import numpy as np

# Model definition (must match training)
class EmotionCNN(nn.Module):
    def __init__(self, num_classes):
        super(EmotionCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2), nn.Dropout(0.25),

            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2), nn.Dropout(0.25)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 16 * 16, 256), nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

Sample Real-Time Inference Code:

In [15]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
from torchvision import transforms
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
NUM_CLASSES = 9
emotion_dict = {0: "Angry", 1: "Boring", 2: "Disgust", 3: "Fear", 4: "Happy", 5: "Neutral", 6: "Sad", 7: "Stress", 8: "Surprise"}
model = EmotionCNN(NUM_CLASSES).to(device)

# Load the saved weights
model = torch.load("emotion_cnn_best.pth")
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Grayscale(),  # Ensure it's 1 channel
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# Webcam capture
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess full frame
    frame_resized = cv2.resize(frame, (64, 64))
    input_tensor = transform(frame_resized).unsqueeze(0).to(device)

    # Predict
    with torch.no_grad():
        outputs = model(input_tensor)
        probs = F.softmax(outputs, dim=1)
        predicted = torch.argmax(probs, 1).item()
        emotion = emotion_dict[predicted]

    # Display result
    cv2.putText(frame, emotion, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow("Emotion Recognition", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

TypeError: 'collections.OrderedDict' object is not callable

In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import cv2
import numpy as np

# Define model
class EmotionCNN(nn.Module):
    def __init__(self, num_classes):
        super(EmotionCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2), nn.Dropout(0.25),

            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2), nn.Dropout(0.25)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 16 * 16, 256), nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load model
NUM_CLASSES = 9
emotion_dict = {
    0: "Angry", 1: "Boring", 2: "Disgust", 3: "Fear",
    4: "Happy", 5: "Neutral", 6: "Sad", 7: "Stress", 8: "Surprise"
}
model = EmotionCNN(NUM_CLASSES).to(device)
model.load_state_dict(torch.load("emotion_cnn_best.pth", map_location=device))
model.eval()

# Image transform
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Grayscale(),
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# Start webcam
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Resize full frame
    frame_resized = cv2.resize(frame, (64, 64))
    input_tensor = transform(frame_resized).unsqueeze(0).to(device)

    # Predict emotion
    with torch.no_grad():
        outputs = model(input_tensor)
        probs = F.softmax(outputs, dim=1)
        predicted = torch.argmax(probs, 1).item()
        emotion = emotion_dict[predicted]

    # Show prediction on original frame
    cv2.putText(frame, emotion, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow("Emotion Recognition", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


RuntimeError: Error(s) in loading state_dict for EmotionCNN:
	Unexpected key(s) in state_dict: "features.13.weight", "features.13.bias", "features.15.weight", "features.15.bias", "features.15.running_mean", "features.15.running_var", "features.15.num_batches_tracked". 
	size mismatch for classifier.1.weight: copying a param with shape torch.Size([512, 16384]) from checkpoint, the shape in current model is torch.Size([256, 32768]).
	size mismatch for classifier.1.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([256]).
	size mismatch for classifier.4.weight: copying a param with shape torch.Size([9, 512]) from checkpoint, the shape in current model is torch.Size([9, 256]).

In [None]:
import tensorflow as tf

# Convert to TFLite with float16 quantization
converter = tf.lite.TFLiteConverter.from_saved_model('emotion_model')
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
tflite_quant_model = converter.convert()
with open('emotion_model_float16.tflite', 'wb') as f:
    f.write(tflite_quant_model)

# For int8 quantization (requires representative dataset)
def representative_dataset():
    for data in dataset.take(100):
        yield [data.astype(np.float32)]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
tflite_quant_model = converter.convert()
with open('emotion_model_int8.tflite', 'wb') as f:
    f.write(tflite_quant_model)
