In [1]:
import os
import shutil
import pandas as pd
from tqdm import tqdm

In [2]:
def organize_by_label(video_root: str, labels_df: pd.DataFrame, split_name: str):
    """
    Copies videos from video_root into out_root/{split_name}/{label}/filename
    based on the Engagement label in labels_df.
    """
    destination = "dataset/daisee-separated/"
    destination = os.path.join(destination, split_name)
    os.makedirs(destination, exist_ok=True)

    # build a lookup from ClipID -> label
    label_map = dict(zip(labels_df['ClipID'], labels_df['Engagement'].astype(str)))

    for dirpath, _, filenames in tqdm(os.walk(video_root)):
        for fname in filenames:
            if not (fname.endswith('.mp4') or fname.endswith('.avi')):
                continue
            if fname not in label_map:
                print("no label found for this file:",fname)
                continue
            lbl = label_map[fname]
            dst_dir = os.path.join(destination, lbl)
            os.makedirs(dst_dir, exist_ok=True)

            src_path = os.path.join(dirpath, fname)
            dst_path = os.path.join(dst_dir, fname)
            shutil.copy2(src_path, dst_path)  # copy2 preserves metadata

In [3]:
label_path = "dataset/DAiSEE/DAiSEE/Labels/AllLabels.csv"
labels_df = pd.read_csv(label_path)
# keep only the columns we need
labels_df = labels_df[['ClipID', 'Engagement']]

# original videos
train_videos = "dataset/DAiSEE/DAiSEE/DataSet/Train"
val_videos   = "dataset/DAiSEE/DAiSEE/DataSet/Validation"
test_videos  = "dataset/DAiSEE/DAiSEE/DataSet/Test"

# organize
organize_by_label(train_videos, labels_df, split_name="Train")
organize_by_label(val_videos,   labels_df, split_name="Validation")
organize_by_label(test_videos, labels_df, split_name="Test")

5553it [02:11, 42.27it/s] 
1743it [00:39, 44.22it/s] 
276it [00:02, 265.15it/s]

no label found for this file: 9988260241.avi
no label found for this file: 9988260212.avi
no label found for this file: 9988260269.avi
no label found for this file: 9988260130.avi
no label found for this file: 9988260243.avi
no label found for this file: 9988260247.avi
no label found for this file: 9988260138.avi
no label found for this file: 9988260231.avi
no label found for this file: 9988260257.avi
no label found for this file: 9988260281.avi
no label found for this file: 9988260145.avi
no label found for this file: 9988260245.avi
no label found for this file: 9988260254.avi
no label found for this file: 9988260246.avi
no label found for this file: 9988260126.avi
no label found for this file: 9988260210.avi
no label found for this file: 9988260230.avi
no label found for this file: 9988260163.avi
no label found for this file: 9988260121.avi
no label found for this file: 9988260216.avi
no label found for this file: 9988260239.avi
no label found for this file: 998826017.avi
no label fo

1888it [00:35, 52.71it/s] 


In [7]:
for label in os.listdir("dataset/daisee-separated/Test"):
    count=0
    label_path = os.path.join("dataset/daisee-separated/Test", label)
    for file in os.listdir(label_path):
        count += 1

    print(label,count)

0 4
1 84
3 814
2 882
