In [None]:
import pandas as pd
from tqdm import tqdm
from pathlib import Path
import numpy as np
import pandas as pd
import cv2
import os
import albumentations as A

# 1. Get Train Labels csv

In [5]:
df = pd.read_csv('AllLabels.csv')
df.head()
print(df.shape)

(8925, 5)


In [6]:
# Cargar Labels del CSV
def load_labels(csv_path):
    df = pd.read_csv(csv_path)
    labels_dict = {}
    for _, row in df.iterrows():
        clip_id = row['ClipID'].replace('.avi', '').replace('.mp4', '')
        labels_dict[clip_id] = [row['Boredom'], row['Engagement'], row['Confusion'], row['Frustration']]
    return labels_dict

In [7]:
# Procesar dataset
def process_dataset(frames_root_folder):
    frames_root_folder = Path(frames_root_folder)
    video_folders = list(frames_root_folder.glob("*/*"))
    with open("video_folders.txt", "w") as f:
        for folder in video_folders:
            folder = str(folder).split("\\")[-1]
            f.write(str(folder) + "\n")

In [9]:
frames_root = "output_frames/Train"
process_dataset(frames_root)

In [10]:
# Load the text file with ClipIDs
with open("video_folders.txt", "r") as f:
    clip_ids = [line.strip() for line in f]
df['ClipID'] = df['ClipID'].str.replace('.avi', '').str.replace('.mp4', '')
filtered_df = df[df['ClipID'].isin(clip_ids)]
filtered_df.shape

(5481, 5)

In [None]:
# save filtered_df to a csv file
filtered_df.to_csv("Labels/TrainLabels.csv", index=False)

# 2. Get Frames per Video

In [23]:
def extract_frames_from_video(video_path, output_folder, max_frames=None):
    cap = cv2.VideoCapture(str(video_path))
    frame_count = 0
    success, frame = cap.read()
    
    while success:
        if max_frames and frame_count >= max_frames:
            break
        frame_filename = output_folder / f"frame_{frame_count:04d}.jpg"
        cv2.imwrite(str(frame_filename), frame)
        success, frame = cap.read()
        frame_count += 1
    cap.release()

def get_all_video_paths(dataset_dir, subset="Train"):
    dataset_path = Path(dataset_dir) / subset
    video_paths = []
    for person_folder in dataset_path.iterdir():
        if person_folder.is_dir():
            for video_folder in person_folder.iterdir():
                if video_folder.is_dir():
                    for video_file in video_folder.glob("*.*"):
                        if video_file.suffix.lower() in [".avi", ".mp4"]:
                            video_paths.append(video_file)

    return video_paths

def getFramesPerVideo(dataset_dir, subset="Train",max_frames_per_video=None, output_base="output_frames"):
    video_paths = get_all_video_paths(dataset_dir, subset=subset)
    print(f"Procesando {len(video_paths)} videos del conjunto {subset}...")

    for video_path in tqdm(video_paths, desc="Extrayendo frames"):
        relative_path = video_path.relative_to(dataset_dir)
        output_folder = Path(output_base) / relative_path.parent
        output_folder.mkdir(parents=True, exist_ok=True)
        extract_frames_from_video(video_path, output_folder, max_frames=max_frames_per_video)

In [24]:
subset = "Train"  # Cambia a "Test" o "Validation" según sea necesario
getFramesPerVideo(
    dataset_dir="../Datasets/DaiSee/DAiSEE/DataSet/",          # Ruta raíz al dataset DAiSEE
    subset=subset,
    #max_videos=6000,             # Cuántos videos procesar
    max_frames_per_video=75,       # Frames máximos por video (None = todos)
    output_base="output_frames"   # Carpeta donde guardar los frames extraídos
)

Procesando 5481 videos del conjunto Train...


Extrayendo frames: 100%|██████████| 5481/5481 [1:03:58<00:00,  1.43it/s]


# 3. Only Engagement Level

In [26]:
df_train = pd.read_csv("../Labels/TrainLabels.csv")
print(df_train.head())
print(df_train.shape)

       ClipID  Boredom  Engagement  Confusion  Frustration
0  1100011002        0           2          0            0
1  1100011003        0           2          0            0
2  1100011004        0           3          0            0
3  1100011005        0           3          0            0
4  1100011006        0           3          0            0
(5481, 5)


In [27]:
# Filtra los ClipIDs con engagement == 0
df_train_filtered = df_train.drop(columns=["Boredom", "Confusion", "Frustration"])
df_train_filtered['Engagement'] = df_train_filtered['Engagement'].replace({0: 0, 1: 0, 2: 1, 3: 1})
df_train_filtered = df_train_filtered[df_train_filtered['Engagement'] == 0]

# Set con los ClipID válidos
valid_clip_ids = set(df_train_filtered['ClipID'].astype(str))
valid_clip_ids

{'1100021003',
 '1100021055',
 '1100022005',
 '1100042023',
 '1100042026',
 '1100051007',
 '1100051016',
 '1100051030',
 '1100051031',
 '1100051053',
 '1100052014',
 '1100062008',
 '1100062045',
 '1100062049',
 '1100112002',
 '1100112006',
 '1100122056',
 '1100131017',
 '1100141013',
 '1100141027',
 '1100142033',
 '1100151011',
 '1100151057',
 '1100152010',
 '1100152017',
 '1100152031',
 '1100152055',
 '1100152070',
 '1100161053',
 '1100162005',
 '1100162016',
 '1100171004',
 '1100171008',
 '1100171059',
 '1100172012',
 '1100172017',
 '1100172033',
 '1100172034',
 '1100172043',
 '1100172058',
 '1100412018',
 '1100412033',
 '1100412039',
 '1110031010',
 '1110031025',
 '1110031027',
 '1110031033',
 '1110031038',
 '1110031056',
 '1110031063',
 '1110032014',
 '1110032027',
 '1110032043',
 '1813740138',
 '1813740184',
 '1813740185',
 '2000491077',
 '2000501006',
 '2000501030',
 '2000502053',
 '2000502065',
 '2000502081',
 '2026140257',
 '2026140264',
 '2026140273',
 '2056010134',
 '20560102

In [32]:
len(valid_clip_ids)

248

In [28]:
from pathlib import Path
import cv2
from tqdm import tqdm

def extract_frames_from_video(video_path, output_folder, max_frames=None):
    cap = cv2.VideoCapture(str(video_path))
    frame_count = 0
    success, frame = cap.read()
    
    while success:
        if max_frames and frame_count >= max_frames:
            break
        frame_filename = output_folder / f"frame_{frame_count:04d}.jpg"
        cv2.imwrite(str(frame_filename), frame)
        success, frame = cap.read()
        frame_count += 1
    cap.release()

def get_all_video_paths(dataset_dir, subset="Train"):
    dataset_path = Path(dataset_dir) / subset
    video_paths = []
    for person_folder in dataset_path.iterdir():
        if person_folder.is_dir():
            for video_folder in person_folder.iterdir():
                if video_folder.is_dir():
                    for video_file in video_folder.glob("*.*"):
                        if video_file.suffix.lower() in [".avi", ".mp4"]:
                            video_paths.append(video_file)
    return video_paths

def getFramesPerVideo_filtered(dataset_dir, valid_clip_ids, subset="Train", max_frames_per_video=None, output_base="Train_Augmentation"):
    video_paths = get_all_video_paths(dataset_dir, subset=subset)
    print(f"Procesando {len(video_paths)} videos del conjunto {subset}...")

    for video_path in tqdm(video_paths, desc="Extrayendo frames"):
        relative_path = video_path.relative_to(dataset_dir)
        person_id = relative_path.parts[1]  # e.g. Train/person123
        clip_id = relative_path.parts[2]    # clip_id (última carpeta antes del video)

        if clip_id in valid_clip_ids:
            # Genera el nuevo path: Train_Augmentation/person_id/clip_id/frames
            output_folder = Path(output_base) / person_id / clip_id
            output_folder.mkdir(parents=True, exist_ok=True)
            extract_frames_from_video(video_path, output_folder, max_frames=max_frames_per_video)

In [31]:
getFramesPerVideo_filtered(
    dataset_dir="../../Datasets/DaiSee/DAiSEE/DataSet/",
    valid_clip_ids=valid_clip_ids,
    subset="Train",
    max_frames_per_video=5,  # o el número que necesites
    output_base="../output_frames/Train_Augmentation"
)

Procesando 5481 videos del conjunto Train...


Extrayendo frames: 100%|██████████| 5481/5481 [00:05<00:00, 1043.77it/s]


## Data Augmentation

In [None]:
# Define las transformaciones
augmentations = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.Rotate(limit=10, p=0.5),
    A.GaussianBlur(p=0.3),
    A.HueSaturationValue(p=0.3),
    A.RandomScale(scale_limit=0.1, p=0.5),
    A.GaussNoise(p=0.3),
])

def augment_and_save_images(input_dir, output_dir, augment_times=5):
    input_dir = Path(input_dir)
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    for img_name in os.listdir(input_dir):
        if img_name.lower().endswith(".jpg"):
            img_path = input_dir / img_name
            image = cv2.imread(str(img_path))

            for i in range(augment_times):
                augmented = augmentations(image=image)["image"]
                aug_filename = output_dir / f"{img_name[:-4]}_aug{i}.jpg"
                cv2.imwrite(str(aug_filename), augmented)


In [None]:
from deepface import DeepFace
def load_labels(csv_path):
    df = pd.read_csv(csv_path)
    labels_dict = {}
    for _, row in df.iterrows():
        # Convertir ClipID a str antes de usar replace y luego a int
        clip_id = str(row['ClipID']).replace('.avi', '').replace('.mp4', '')
        labels_dict[clip_id] = [
            int(row['Boredom']),
            int(row['Engagement']),
            int(row['Confusion']),
            int(row['Frustration'])
        ]
    return labels_dict

In [34]:
def extract_facenet_embedding(img_path):
    try:
        embedding = DeepFace.represent(img_path=img_path, model_name='Facenet', enforce_detection=False)
        return np.array(embedding[0]['embedding'])
    except Exception as e:
        print(f"Error en {img_path}: {e}")
        return None

In [35]:
def process_dataset(frames_root_folder, labels_csv, max_videos=1250):
    labels = load_labels(labels_csv)
    X = []
    y = []

    # Obtener todas las carpetas de video
    frames_root_folder = Path(frames_root_folder)
    video_folders = list(frames_root_folder.glob("*/*"))
    print(f"Total de carpetas de video encontradas: {len(video_folders)}")

    # Limitar a los primeros `max_videos`
    video_folders = video_folders[:max_videos]
    print(f"Procesando {len(video_folders)} carpetas de video...")

    # Procesar cada carpeta de video
    for video_folder in tqdm(video_folders, desc="Procesando videos"):
        clip_id = video_folder.name

        # Verificar si el clip_id está en labels
        if clip_id not in labels:
            with open("missing_clip_ids.txt", "a") as f:
                f.write(clip_id + "\n")
            print(f"ClipID {clip_id} no encontrado en labels.")
            continue

        emotion_levels = labels[clip_id]  # lista de 4 números

        # Crear un vector one-hot de 16 posiciones
        label_vector = np.zeros(16)
        for i, level in enumerate(emotion_levels):  # i = 0 (boredom), 1 (engagement), 2 (confusion), 3 (frustration)
            index = i * 4 + level  # cada emoción tiene 4 niveles
            label_vector[index] = 1

        video_embeddings = []
        
        # Procesar los frames dentro de esta carpeta
        for frame_path in video_folder.glob("*.jpg"):
            embedding = extract_facenet_embedding(str(frame_path))
            if embedding is not None:
                video_embeddings.append(embedding)
        
        video_embedding = np.mean(video_embeddings, axis=0)
        video_embedding = np.array(video_embedding)
        
        X.append(video_embedding)
        y.append(label_vector)

    X = np.array(X)
    y = np.array(y)

    print(f"Dataset procesado: {X.shape[0]} ejemplos.")
    return X, y

In [41]:
# Procesar train dataset
frames_root = "../output_frames/Train_Augmentation"
labels_csv = "../Labels/TrainLabels.csv" 
X_train_Augmentation, X_train_Augmentation = process_dataset(frames_root, labels_csv)

Total de carpetas de video encontradas: 248
Procesando 248 carpetas de video...


Procesando videos:  10%|█         | 25/248 [00:44<06:37,  1.78s/it]


KeyboardInterrupt: 