In [None]:
import os
import cv2
import numpy as np
from fer import FER
from tqdm import tqdm

def extract_fer_features(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Failed to load image: {image_path}")
        return None
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    detector = FER()
    result = detector.detect_emotions(image)
    
    if result:
        emotions = result[0]['emotions']
        return list(emotions.values())
    else:
        return None

def process_video_folder(video_folder_path):
    frame_features = []
    image_files = sorted([os.path.join(video_folder_path, file)
                          for file in os.listdir(video_folder_path) if file.endswith(".jpg")])
    
    for image_path in image_files:
        features = extract_fer_features(image_path)
        if features is not None:
            frame_features.append(features)
    
    return frame_features

def pad_sequences(sequences, maxlen, feature_dim):
    padded_sequences = np.zeros((len(sequences), maxlen, feature_dim))
    for i, seq in enumerate(sequences):
        length = len(seq)
        if length > maxlen:
            padded_sequences[i, :maxlen, :] = seq[:maxlen]
        else:
            padded_sequences[i, :length, :] = seq
    return padded_sequences

def save_features_by_folder_with_padding(base_folder_path):
    video_folders = [os.path.join(base_folder_path, folder)
                     for folder in os.listdir(base_folder_path) if os.path.isdir(os.path.join(base_folder_path, folder))]

    max_sequence_length = 0
    feature_dim = None

    # First pass to determine max sequence length and feature dimension
    for video_folder in tqdm(video_folders, desc="Determining sequence lengths"):
        video_features = process_video_folder(video_folder)
        if video_features:
            if len(video_features) > max_sequence_length:
                max_sequence_length = len(video_features)
            if feature_dim is None and video_features:
                feature_dim = len(video_features[0])

    # Second pass to save padded sequences
    for video_folder in tqdm(video_folders, desc="Saving padded features"):
        video_features = process_video_folder(video_folder)
        if video_features:
            padded_features = pad_sequences([video_features], max_sequence_length, feature_dim)
            np.save(os.path.join(video_folder, 'features_padded.npy'), padded_features)

In [None]:
def load_and_concatenate_padded_features(base_folder_path):
    all_features = []
    video_folders = [os.path.join(base_folder_path, folder)
                     for folder in os.listdir(base_folder_path) if os.path.isdir(os.path.join(base_folder_path, folder))]
    
    for video_folder in tqdm(video_folders, desc="Loading padded features"):
        features_path = os.path.join(video_folder, 'features_padded.npy')
        if os.path.exists(features_path):
            features = np.load(features_path)
            all_features.append(features)
    
    # Combine all features into one array
    all_features_concatenated = np.concatenate(all_features, axis=0)
    return all_features_concatenated

In [None]:
train_folder_path = "PATH"
val_folder_path = "PATH"
test_folder_path = "PATH"

save_features_by_folder_with_padding(train_folder_path)
save_features_by_folder_with_padding(val_folder_path)
save_features_by_folder_with_padding(test_folder_path)

In [None]:
train_all_features_concatenated = load_and_concatenate_padded_features(train_folder_path)
val_all_features_concatenated = load_and_concatenate_padded_features(val_folder_path)
test_all_features_concatenated = load_and_concatenate_padded_features(test_folder_path)