In [17]:
participant_id = "P3"

In [18]:
%load_ext autoreload
%autoreload 2
from hireverse.utils.utils import *

VID_FILE_PATH = BASE_DIR + "/data/raw/videos"
OUTPUT_CSV_FILE = BASE_DIR + "/data/processed/interview_features.csv"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
from hireverse.utils.face_analyzer import FaceAnalyzer

face_analyzer = FaceAnalyzer()
frames = face_analyzer.get_video_frames_for_participant(
    participant_id, VID_FILE_PATH, num_selected_frames=540)

# Lexical Features

In [20]:
# from hireverse.utils.LexicalAnalyser import LexicalAnalyser

# AUDIO_FILE_PATH = os.path.join(BASE_DIR, "data", "raw", "audio", f"trimmed_{participant_id}.wav")
# lexical_analyser = LexicalAnalyser(AUDIO_FILE_PATH)
# # Extract all features
# lexical_features = lexical_analyser.extract_all_features()

# # Print the extracted features
# print(lexical_features)

# Facial Features

### Face Mesh

In [21]:
from hireverse.schemas.frame import Frame


filtered_frames: List[Frame] = []
for frame in frames:
    frame.facial_landmarks_obj = face_analyzer.process_image_results(frame.image)
    if frame.facial_landmarks_obj:
        frame.facial_landmarks = frame.facial_landmarks_obj.landmark
        filtered_frames.append(frame)

frames = filtered_frames

### Face

In [22]:
for frame in frames:
    if frame.facial_landmarks:
        frame.face = face_analyzer.get_face_coordinates(frame.facial_landmarks, frame.image)

In [23]:
for i in range (len(frames)-1):
    bbox1 =face_analyzer.get_bouding_box_center(frames[i].face)
    bbox2= face_analyzer.get_bouding_box_center(frames[i+1].face)
    frames[i].head_displacement = face_analyzer.get_displacement_between_two_bounding_boxes(bbox1, bbox2)
    frames[i].head_vertical_displacement = face_analyzer.get_vertical_displacement_between_two_bounding_boxes(bbox1, bbox2)
    frames[i].head_horizontal_displacement = face_analyzer.get_horizontal_distance_between_two_bounding_boxes(bbox1, bbox2)

### Smile

In [24]:
import numpy as np

SMOOTH_WINDOW = 5
happiness_buffer = []
def smooth_happiness(happiness_prob):
    if happiness_prob is None:
        return 0 # TODO: change?
    happiness_buffer.append(happiness_prob)
    if len(happiness_buffer) > SMOOTH_WINDOW:
        happiness_buffer.pop(0)
    return np.mean(happiness_buffer)


for i, frame in enumerate(frames):
    face_roi = face_analyzer.get_face_roi_image(frame.image, frame.face, expand_ratio=1.1)
    frame.smile = smooth_happiness(face_analyzer.get_smile_from_frame(face_roi))

Action: emotion: 100%|██████████| 1/1 [00:00<00:00,  4.52it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 22.96it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 22.52it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 22.19it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 22.67it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 23.64it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 23.88it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 23.54it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 25.24it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 25.16it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 22.44it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 23.42it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 24.71it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 21.78it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00, 14.89it/s]
Action: emotion: 100%|██████████| 1/1 [00:00<00:00,  9.

### Selected Facial Features

In [25]:
for frame in frames:
    frame.two_landmarks_connectors = face_analyzer.get_selected_facial_landmarks(frame.facial_landmarks)

### Head Pose

In [26]:
texts = []
for frame in frames:
    result = face_analyzer.get_face_angles(frame.image, frame.facial_landmarks)
    frame.face_angles = result

# Prosodic Extraction

In [27]:
from hireverse.schemas.model_features import ProsodicFeatures
from hireverse.utils.prosody_analyzer import ProsodyAnalyzer

prosody_analyzer = ProsodyAnalyzer(participant_id)
prosodic_features: ProsodicFeatures = prosody_analyzer.extract_all_features()
print(prosodic_features)

ProsodicFeatures(f0_mean=194.53164067671884, f0_min=74.93033786061116, f0_max=599.8826675003872, f0_range=524.952329639776, f0_sd=156.84479609017473, intensity_mean=-22.336040496826172, intensity_min=-39.19129180908203, intensity_max=0.0, intensity_range=39.19129180908203, intensity_sd=8.249971389770508, f1_mean=688.0886053830984, f1_sd=336.5396110774072, f2_mean=1855.015177890005, f2_sd=451.70836429328835, f3_mean=2950.4588525928652, f3_sd=409.64059236388266, f2_f1_mean=3.1558118079971518, f3_f1_mean=5.078482734923638, f2_f1_sd=1.4097626628672106, f3_f1_sd=2.1006202054699337, jitter=0.04423147417760052, shimmer=0.21153062496496527, percent_unvoiced=18.06900065506954, percent_breaks=2.007267693372538, max_pause_duration=6.240000000000007, avg_pause_duration=0.3424137931034484, duration=439.648)


# Features Storage

### Facial Features Aggregation

In [28]:
from hireverse.utils.feature_storage import FeatureStorage

feature_storage = FeatureStorage(OUTPUT_CSV_FILE)
facial_features = feature_storage.aggregate_facial_features(frames)

In [29]:
feature_storage.save_to_csv(participant_id, facial_features, prosodic_features)

# Display Frames

In [30]:
# for frame in frames:
#     frame.reset_drawable_image()
#     # frame.draw_face_border()
    
#     frame.draw_selected_facial_landmarks(draw_lines=True)
    
#     frame.put_face_angles()
#     # frame.draw_facial_landmarks()
#     frame.display()