# Setup

In [1]:
import tensorflow as tf
import numpy as np
import cv2
import mediapipe as mp
import time
import glob

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Dropout, Flatten
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam




# Parameters & Model

In [146]:
emotions = {
    0: ['Tidak Percaya Diri', (255,0,0), (255,255,255)],
    1: ['Tidak Percaya Diri', (255,0,0), (255,255,255)],
    2: ['Gugup', (255,255,0), (0,51,51)],
    3: ['Percaya Diri', (0,255,0), (255,255,255)],
    4: ['Gugup', (255,255,0), (0,51,51)],
    5: ['Percaya Diri', (0,255,0), (255,255,255)],
    6: ['Netral', (0,0,255), (255,255,255)]
}
num_classes = len(emotions)
input_shape = (48, 48, 1)
weights_1 = 'saved_models/vggnet.h5'
weights_2 = 'saved_models/vggnet_up.h5'

In [147]:
class VGGNet(Sequential):
    def __init__(self, input_shape, num_classes, checkpoint_path, lr=1e-3):
        super().__init__()
        self.add(Rescaling(1./255, input_shape=input_shape))
        self.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'))
        self.add(BatchNormalization())
        self.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.5))

        self.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.4))

        self.add(Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.5))

        self.add(Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same'))
        self.add(BatchNormalization())
        self.add(MaxPool2D())
        self.add(Dropout(0.4))

        self.add(Flatten())
        
        self.add(Dense(1024, activation='relu'))
        self.add(Dropout(0.5))
        self.add(Dense(256, activation='relu'))

        self.add(Dense(num_classes, activation='softmax'))

        self.compile(optimizer=Adam(learning_rate=lr),
                    loss=categorical_crossentropy,
                    metrics=['accuracy'])
        
        self.checkpoint_path = checkpoint_path

In [148]:
model_1 = VGGNet(input_shape, num_classes, weights_1)
model_1.load_weights(model_1.checkpoint_path)

model_2 = VGGNet(input_shape, num_classes, weights_2)
model_2.load_weights(model_2.checkpoint_path)

# Inference

In [149]:
# deteksi wajah
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)

In [150]:
# preprocessing image
def detection_preprocessing(image, h_max=360):
    h, w, _ = image.shape
    if h > h_max:
        ratio = h_max / h
        w_ = int(w * ratio)
        image = cv2.resize(image, (w_,h_max))
    return image

def resize_face(face):
    x = tf.expand_dims(tf.convert_to_tensor(face), axis=2)
    return tf.image.resize(x, (48,48))

def recognition_preprocessing(faces):
    x = tf.convert_to_tensor([resize_face(f) for f in faces])
    return x

In [151]:
# prediksi image
def inference(image):
    H, W, _ = image.shape
    
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_detection.process(rgb_image)

    if results.detections:
        faces = []
        pos = []
        for detection in results.detections:
            box = detection.location_data.relative_bounding_box
            # mp_drawing.draw_detection(image, detection)

            x = int(box.xmin * W)
            y = int(box.ymin * H)
            w = int(box.width * W)
            h = int(box.height * H)

            x1 = max(0, x)
            y1 = max(0, y)
            x2 = min(x + w, W)
            y2 = min(y + h, H)

            face = image[y1:y2,x1:x2]
            face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
            faces.append(face)
            pos.append((x1, y1, x2, y2))
    
        x = recognition_preprocessing(faces)

        y_1 = model_1.predict(x)
        y_2 = model_2.predict(x)
        l = np.argmax(y_1+y_2, axis=1)

        for i in range(len(faces)):
            cv2.rectangle(image, (pos[i][0],pos[i][1]),
                            (pos[i][2],pos[i][3]), emotions[l[i]][1], 2, lineType=cv2.LINE_AA)
            
            cv2.rectangle(image, (pos[i][0],pos[i][1]-20),
                            (pos[i][2]+20,pos[i][1]), emotions[l[i]][1], -1, lineType=cv2.LINE_AA)
            
            cv2.putText(image, f'{emotions[l[i]][0]}', (pos[i][0],pos[i][1]-5),
                            0, 0.6, emotions[l[i]][2], 2, lineType=cv2.LINE_AA)
            
            accuracy_text = f'Acc: {max(y_2[i]):.2f}'
            cv2.putText(image, accuracy_text, (pos[i][0], pos[i][1]-25), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
    return image

# Video

In [152]:
import tkinter as tk
from tkinter import filedialog
import os

In [153]:
def resize_image(image, target_height=360):
    ratio = target_height / image.shape[0]
    dimensions = (int(image.shape[1] * ratio), target_height)
    resized_image = cv2.resize(image, dimensions, interpolation=cv2.INTER_AREA)
    return resized_image

In [154]:
root = tk.Tk()
root.withdraw()

''

In [155]:
# Membuka dialog pilih file
file_path = filedialog.askopenfilename()

if file_path:
    cap = cv2.VideoCapture(file_path)
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    fps = cap.get(cv2.CAP_PROP_FPS)
    target_h = 360
    target_w = int(target_h * frame_width / frame_height)
    
    # Mendapatkan nama file dari path yang dipilih
    file_name = os.path.basename(file_path)
    output_file_path = 'run/' + os.path.splitext(file_name)[0] + '_output.mp4'

    out = cv2.VideoWriter(output_file_path, cv2.VideoWriter_fourcc('M','J','P','G'),
                          fps, (target_w, target_h))

    while True:
        success, image = cap.read()
        if success:
            image = resize_image(image, target_height=target_h)
            result = inference(image)
            out.write(result)
            cv2.imshow('Frame', result)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break

        # jumlah frame yang diproses
        print(f'Frame: {cap.get(cv2.CAP_PROP_POS_FRAMES)}', end='\r')


    cap.release()
    out.release()
    cv2.destroyAllWindows()


Frame: 423.0

In [156]:
# # Membuka dialog pilih file
# file_path = filedialog.askopenfilename()

# if file_path:
#     cap = cv2.VideoCapture(file_path)
#     frame_width = int(cap.get(3))
#     frame_height = int(cap.get(4))
#     fps = cap.get(cv2.CAP_PROP_FPS)
#     target_h = 360
#     target_w = int(target_h * frame_width / frame_height)
    
#     out = cv2.VideoWriter('run/cobalah.mp4', cv2.VideoWriter_fourcc('M','J','P','G'),
#                           fps, (target_w, target_h))
    
#     while True:
#         success, image = cap.read()
#         if success:
#             image = resize_image(image, target_height=target_h)
#             result = inference(image)
#             out.write(result)
#             cv2.imshow('Frame', result)
#             if cv2.waitKey(1) & 0xFF == ord('q'):
#                 break
#         else:
#             break

#     cap.release()
#     out.release()
#     cv2.destroyAllWindows()


In [157]:
# video = 'tes/interview.mp4'
# cap = cv2.VideoCapture(video)
# frame_width = int(cap.get(3))
# frame_height = int(cap.get(4))
# fps = cap.get(cv2.CAP_PROP_FPS)
# target_h = 360
# target_w = int(target_h * frame_width / frame_height)
# out = cv2.VideoWriter('run/cobaaa5.mp4',cv2.VideoWriter_fourcc('M','J','P','G'),
#                       fps, (target_w,target_h))

In [158]:
# while True:
#     success, image = cap.read()
#     if success:
#         image = resize_image(image)
#         result = inference(image)
#         out.write(result)
#         if cv2.waitKey(1) & 0xFF == ord('q'):
#             break
#     else:
#         break
    
# cap.release()
# out.release()
# cv2.destroyAllWindows()