In [None]:
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    import torchvision.transforms as transforms
    from torchvision.models import vit_b_16
    from torch.utils.data import DataLoader, Dataset
    import pandas as pd
    import numpy as np
    from PIL import Image
    from torch.optim import AdamW
    from torch.optim.lr_scheduler import StepLR

    # Load and Preprocess the FER2013 Dataset
    class FER2013Dataset(Dataset):
        def __init__(self, data, transform=None):
            self.data = data
            self.transform = transform

        def __len__(self):
            return len(self.data)

        def __getitem__(self, idx):
            pixels = np.fromstring(self.data.iloc[idx]['pixels'], dtype=int, sep=' ')
            image = pixels.reshape(48, 48).astype(np.uint8)
            image = Image.fromarray(image)
            if self.transform:
                image = self.transform(image)
            label = int(self.data.iloc[idx]['emotion'])
            return image, label

    # Define transformation to increase input resolution and apply data augmentation
    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=3),  # Grayscale to RGB
        transforms.Resize((224, 224)),                  # Increase input resolution
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))          # Normalization
    ])

    # Load FER2013 data
    data = pd.read_csv("C:/Users/dang0/Downloads/fer2013.csv")
    train_data = FER2013Dataset(data[data['Usage'] == 'Training'], transform=transform)
    val_data = FER2013Dataset(data[data['Usage'] == 'PublicTest'], transform=transform)
    train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=64, shuffle=False)

    # Initialize Pre-trained Vision Transformer Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = vit_b_16(pretrained=True) 
    num_classes = 7  # FER2013 has 7 emotion classes
    model.heads.head = nn.Linear(model.heads.head.in_features, num_classes)
    model.to(device)

    # Freeze initial layers for transfer learning
    for param in model.parameters():
        param.requires_grad = False
    for param in model.heads.head.parameters():
        param.requires_grad = True  # Only train the final layer at first

    # Define Focal Loss
    def focal_loss(inputs, targets, alpha=0.25, gamma=2):
        bce_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-bce_loss)
        focal_loss = alpha * (1 - pt) ** gamma * bce_loss
        return focal_loss.mean()

    # Optimizer and Scheduler
    optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)
    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

    # Training Loop with Focal Loss
    num_epochs = 20
    best_accuracy = 0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = focal_loss(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        scheduler.step()  # Update learning rate

        # Validation loop for accuracy measurement
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        accuracy = 100 * correct / total
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Validation Accuracy: {accuracy:.2f}%')

        # Save the model if accuracy improves
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            torch.save(model.state_dict(), 'best_vit_emotion_model.pth')

        # Gradually unfreeze layers if accuracy improves
        if epoch % 5 == 0 and accuracy > best_accuracy * 0.9:
            for param in model.parameters():
                param.requires_grad = True  # Unfreeze more layers gradually

    print(f"Training complete. Best validation accuracy: {best_accuracy:.2f}%")


In [None]:
# import matplotlib.pyplot as plt

# # Initialize lists to store loss and accuracy for plotting
# train_losses = []
# val_accuracies = []

# # Training Loop with Focal Loss
# num_epochs = 20
# best_accuracy = 0

# for epoch in range(num_epochs):
#     model.train()
#     running_loss = 0.0
#     for images, labels in train_loader:
#         images, labels = images.to(device), labels.to(device)

#         # Forward pass
#         outputs = model(images)
#         loss = focal_loss(outputs, labels)

#         # Backward and optimize
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()
#         running_loss += loss.item()

#     scheduler.step()  # Update learning rate

#     # Validation loop for accuracy measurement
#     model.eval()
#     correct = 0
#     total = 0
#     with torch.no_grad():
#         for images, labels in val_loader:
#             images, labels = images.to(device), labels.to(device)
#             outputs = model(images)
#             _, predicted = torch.max(outputs, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()
    
#     accuracy = 100 * correct / total
#     train_losses.append(running_loss / len(train_loader))
#     val_accuracies.append(accuracy)

#     print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Validation Accuracy: {accuracy:.2f}%')

#     # Save the model if accuracy improves
#     if accuracy > best_accuracy:
#         best_accuracy = accuracy
#         torch.save(model.state_dict(), 'best_vit_emotion_model.pth')

#     # Gradually unfreeze layers if accuracy improves
#     if epoch % 5 == 0 and accuracy > best_accuracy * 0.9:
#         for param in model.parameters():
#             param.requires_grad = True  # Unfreeze more layers gradually

# print(f"Training complete. Best validation accuracy: {best_accuracy:.2f}%")

# # Plot training loss and validation accuracy
# plt.figure(figsize=(12, 5))

# # Plotting training loss
# plt.subplot(1, 2, 1)
# plt.plot(range(1, num_epochs + 1), train_losses, label='Training Loss', color='blue')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.title('Training Loss Over Epochs')
# plt.grid()
# plt.legend()

# # Plotting validation accuracy
# plt.subplot(1, 2, 2)
# plt.plot(range(1, num_epochs + 1), val_accuracies, label='Validation Accuracy', color='green')
# plt.xlabel('Epoch')
# plt.ylabel('Accuracy (%)')
# plt.title('Validation Accuracy Over Epochs')
# plt.grid()
# plt.legend()

# plt.tight_layout()
# plt.show()


In [None]:
import cv2

# OpenCV setup for real-time emotion detection
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess the frame
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(gray, (48, 48))
    img_tensor = transform(Image.fromarray(resized)).unsqueeze(0).to(device)

    # Get prediction\
    with torch.no_grad():
        output = model(img_tensor)
        _, predicted = torch.max(output, 1)
        emotion = emotion_labels[predicted.item()]

    # Display emotion on frame
    cv2.putText(frame, emotion, (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow('Emotion Recognition', frame)

    # Exit on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
