# Import

In [None]:
import os
import torch.optim as optim
import cv2
import pandas as pd
import numpy as np
import torch
from torchvision import transforms
from PIL import Image
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from tensorflow.keras.models import load_model

# Chargement de la donnée

In [2]:
class FER2013Dataset(Dataset):
    def __init__(self, directory, transform=None):
        self.directory = directory
        self.transform = transform
        self.data = []
        for label in os.listdir(directory):
            for image_file in os.listdir(os.path.join(directory, label)):
                self.data.append([os.path.join(directory, label, image_file), label])
                
        self.label_to_idx = {label: idx for idx, label in enumerate(os.listdir(directory))}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        image = Image.open(img_path).convert('L') 
        label = self.label_to_idx[label]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

# On définit les transformations des images
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# On crée les datasets
train_dataset = FER2013Dataset(directory='archive/train', transform=transform)
test_dataset = FER2013Dataset(directory='archive/test', transform=transform)

# On crée les DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Le modèle

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Deep_Emotion(nn.Module):
    def __init__(self):
        '''
        Deep_Emotion class contains the network architecture.
        '''
        super(Deep_Emotion,self).__init__()
        self.conv1 = nn.Conv2d(1,10,3)
        self.conv2 = nn.Conv2d(10,10,3)
        self.pool2 = nn.MaxPool2d(2,2)

        self.conv3 = nn.Conv2d(10,10,3)
        self.conv4 = nn.Conv2d(10,10,3)
        self.pool4 = nn.MaxPool2d(2,2)

        self.norm = nn.BatchNorm2d(10)

        self.fc1 = nn.Linear(810,50)
        self.fc2 = nn.Linear(50,7)

        self.localization = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=7),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True),
            nn.Conv2d(8, 10, kernel_size=5),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True)
        )

        self.fc_loc = nn.Sequential(
            nn.Linear(640, 32),
            nn.ReLU(True),
            nn.Linear(32, 3 * 2)
        )
        self.fc_loc[2].weight.data.zero_()
        self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

    def stn(self, x):
        xs = self.localization(x)
        xs = xs.view(-1, 640)
        theta = self.fc_loc(xs)
        theta = theta.view(-1, 2, 3)

        grid = F.affine_grid(theta, x.size())
        x = F.grid_sample(x, grid)
        return x

    def forward(self,input):
        out = self.stn(input)

        out = F.relu(self.conv1(out))
        out = self.conv2(out)
        out = F.relu(self.pool2(out))

        out = F.relu(self.conv3(out))
        out = self.norm(self.conv4(out))
        out = F.relu(self.pool4(out))

        out = F.dropout(out)
        out = out.view(-1, 810)
        out = F.relu(self.fc1(out))
        out = self.fc2(out)

        return out

# Fonction d'entrainement

In [9]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using {device} device")

Using mps device


In [10]:
def train_model(model, criterion, optimizer, train_loader, test_loader, num_epochs=25):
    
    model.to(device)
    print("===================================Start Training===================================")
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        validation_loss = 0.0
        correct = 0
        val_correct = 0
        total = 0
        val_total = 0
        
        for data in train_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        for data in test_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            val_outputs = model(inputs)
            val_loss = criterion(val_outputs, labels)  
            validation_loss += val_loss.item()
            _, val_predicted = torch.max(val_outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (val_predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total

        val_epoch_loss = validation_loss / len(test_loader)
        val_epoch_acc = 100 * val_correct / val_total
        
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')
        print(f'Validation Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_epoch_loss:.4f}, Validation Accuracy: {val_epoch_acc:.2f}%')

    print("===================================Training Finished===================================")



In [11]:
model = Deep_Emotion().to(device)

In [12]:
criterion = nn.CrossEntropyLoss()

In [13]:
#optimizer = optim.Adam(model.parameters(), lr=0.005)

In [14]:
num_epochs = 1
#train_model(model, criterion, optimizer, train_loader, test_loader, num_epochs=num_epochs)

# Load and visualize model

In [20]:
import torch

#model.load_state_dict(torch.load('model1.pth', map_location=torch.device('cpu')))
model = load_model('emotion_model.h5')

In [16]:
transformation = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))])
classes = ('Angry', 'Disgust', 'Fear', 'Happy','Sad', 'Surprise', 'Neutral')


def load_img(path):
    img = Image.open(path)
    img = transformation(img).float()
    img = torch.autograd.Variable(img,requires_grad = True)
    img = img.unsqueeze(0)
    return img.to(device)

In [17]:
font_scale = 1.5
font = cv2.FONT_HERSHEY_PLAIN
rectangle_bgr = (255, 255, 255)
img = np.zeros((500, 500))
text = "Some text in a box"
(text_width, text_height) = cv2.getTextSize(text, font, fontScale=font_scale, thickness=1)[0]
text_offset_x = 10
text_offset_y = img.shape[0] - 25
box_cooords = ((text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height - 2))
cv2.rectangle(img, box_cooords[0], box_cooords[1], rectangle_bgr, cv2.FILLED)
cv2.putText(img, text, (text_offset_x, text_offset_y), font, fontScale=font_scale, color=(0, 0, 0), thickness=1)

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [19]:
#cap = cv2.VideoCapture(1)
#if not cap.isOpened():
 #   cap = cv2.VideoCapture(0)
#if not cap.isOpened():
 #   raise IOError("Cannot open webcam")

cap = cv2.VideoCapture(0)


while True:
    ret, frame = cap.read()
    faceCascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = faceCascade.detectMultiScale(gray, 1.1, 4)
    face_roi = None

    for x, y, w, h in faces:
        roi_gray = gray[y:y+h, x:x+w]
        roi_color = frame[y:y+h, x:x+w]
        cv2.rectangle(frame,  (x, y), (x+w, y+h), (255, 0, 0), 2)
        facess = faceCascade.detectMultiScale(roi_gray)
        if len(facess) != 0:
            for (ex, ey, ew, eh) in facess:
                face_roi = roi_color[ey:ey+eh, ex:ex+ew]
        

    if face_roi != None: 
        graytemp = cv2.cvtColor(face_roi, cv2.COLOR_BGR2GRAY)
        final_image = cv2.resize(graytemp, (48, 48))
        final_image = np.expand_dims(final_image, axis=0)
        final_image = np.expand_dims(final_image, axis=0)
        final_image = final_image / 255.0
        dataa = torch.from_numpy(final_image)
        dataa = dataa.type(torch.FloatTensor)
        dataa = dataa.to(device)
        outputs = model(dataa)
        pred = F.softmax(outputs, dim=1)
        prediction = torch.argmax(pred)
        print(prediction)
        
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale : 1.5
        font = cv2.FONT_HERSHEY_PLAIN

        status = classes[prediction]
        x1, y1, w1, h1 = 0, 0, 175, 75
        cv2.rectangle(frame, (x1, x1), (x1 + w1, y1 + h1), (0, 0, 0), -1)
        cv2.putText(frame, status, (x1 + int(w1/10), y1+ int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        cv2.putText(frame, status, (100, 150), font, 3, (90, 0, 255), 2, cv2.LINE_4)
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255))

    cv2.imshow("Face Emotion Recognition", frame)
    if cv2.waitKey(2) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows



tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(0, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, device='mps:0')
tensor(6, de

KeyboardInterrupt: 

In [29]:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
cap = cv2.VideoCapture(0)
while True:

        # Read the frame
        _, img = cap.read()
        # Convert to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # Detect the faces
        faces = face_cascade.detectMultiScale(gray, 1.1, 4)
        # Draw the rectangle around each face
        roi = None
        for (x, y, w, h) in faces:
            roi = img[y:y+h, x:x+w]
            roi = cv2.cvtColor(roi,cv2.COLOR_BGR2GRAY)
            roi = cv2.resize(roi,(48,48))
            cv2.imwrite("roi.jpg", roi)
            cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)

        imgg = load_img("roi.jpg")
        out = model(imgg)
        pred = F.softmax(out)
        classs = torch.argmax(pred,1)
      #  wrong = torch.where(classs != 3,torch.tensor([1.]).cuda(),torch.tensor([0.]).cuda())
        classs = torch.argmax(pred,1)
        prediction = classes[classs.item()]
        font = cv2.FONT_HERSHEY_SIMPLEX
        org = (50, 50)
        fontScale = 1
        color = (255, 0, 0)
        thickness = 2
        img = cv2.putText(img, prediction, org, font,
                       fontScale, color, thickness, cv2.LINE_AA)

        cv2.imshow('img', img)
        # Stop if (Q) key is pressed
        k = cv2.waitKey(30)
        if k==ord("q"):
            break
cap.release()

  pred = F.softmax(out)


KeyboardInterrupt: 