In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# !pip install yt_dlp

Collecting yt_dlp
  Downloading yt_dlp-2025.4.30-py3-none-any.whl.metadata (173 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/173.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━[0m [32m163.8/173.3 kB[0m [31m5.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m173.3/173.3 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading yt_dlp-2025.4.30-py3-none-any.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m43.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: yt_dlp
Successfully installed yt_dlp-2025.4.30


In [2]:
import os
import pandas as pd
from tqdm import tqdm
import cv2
import subprocess
import requests
import glob

### Combine CSV files

In [None]:
def load_annotations(folder_path):
    all_data = []

    for filename in tqdm(os.listdir(folder_path)):
        if filename.endswith("-activespeaker.csv"):
            file_path = os.path.join(folder_path, filename)
            video_id = filename.replace("-activespeaker.csv", "")
            df = pd.read_csv(file_path, header=None, sep=',')

            df.columns = ['video_id', 'timestamp', 'x1', 'y1', 'x2', 'y2', 'label', 'entity_id']
            df['video_file'] = video_id
            all_data.append(df)

    full_df = pd.concat(all_data, ignore_index=True)
    return full_df


In [None]:
dir = '/content/drive/MyDrive/aifarm/centerstage'
train_labels = load_annotations(f"{dir}/data/raw/label_train")
test_labels = load_annotations(f"{dir}/data/raw/label_test")

train_labels.head()

100%|██████████| 120/120 [00:21<00:00,  5.47it/s]
100%|██████████| 33/33 [00:13<00:00,  2.48it/s]


Unnamed: 0,video_id,timestamp,x1,y1,x2,y2,label,entity_id,video_file
0,20TAGRElvfE,1740.0,0.504016,0.183333,0.678715,0.463889,NOT_SPEAKING,20TAGRElvfE_1740_1800:1,20TAGRElvfE
1,20TAGRElvfE,1740.04,0.504016,0.183333,0.678715,0.463889,NOT_SPEAKING,20TAGRElvfE_1740_1800:1,20TAGRElvfE
2,20TAGRElvfE,1740.07,0.504016,0.180556,0.678715,0.463889,NOT_SPEAKING,20TAGRElvfE_1740_1800:1,20TAGRElvfE
3,20TAGRElvfE,1740.11,0.504016,0.183333,0.678715,0.463889,NOT_SPEAKING,20TAGRElvfE_1740_1800:1,20TAGRElvfE
4,20TAGRElvfE,1740.16,0.504016,0.183333,0.678715,0.463889,NOT_SPEAKING,20TAGRElvfE_1740_1800:1,20TAGRElvfE


In [None]:
train_labels.shape

(2676314, 9)

In [None]:
test_labels.shape

(768307, 9)

In [None]:
train_labels['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
NOT_SPEAKING,1969134
SPEAKING_AUDIBLE,682404
SPEAKING_NOT_AUDIBLE,24776


Since our system is vision-only (no audio), the distinction between audible and inaudible isn't something the model can directly observe. So we treat both speaking cases as speaking

In [None]:
filtered_df = train_labels[train_labels['label'].isin(['SPEAKING_AUDIBLE', 'SPEAKING_NOT_AUDIBLE', 'NOT_SPEAKING'])]

filtered_df['binary_label'] = filtered_df['label'].apply(
    lambda x: 1 if x in ['SPEAKING_AUDIBLE', 'SPEAKING_NOT_AUDIBLE'] else 0
)

In [None]:
# Check Distribution
print(filtered_df['binary_label'].value_counts())

binary_label
0    1969134
1     707180
Name: count, dtype: int64


In [None]:
BASE_DIR = '/content/drive/MyDrive/aifarm/centerstage'
os.makedirs(os.path.join(BASE_DIR, "data/processed"), exist_ok=True)

filtered_df.to_csv(f"{BASE_DIR}/data/processed/train_labels_clean.csv", index=False)
test_labels.to_csv(f"{BASE_DIR}/data/processed/test_labels_clean.csv", index=False)

In [None]:
filtered_df

Unnamed: 0,video_id,timestamp,x1,y1,x2,y2,label,entity_id,video_file,binary_label
0,20TAGRElvfE,1740.00,0.504016,0.183333,0.678715,0.463889,NOT_SPEAKING,20TAGRElvfE_1740_1800:1,20TAGRElvfE,0
1,20TAGRElvfE,1740.04,0.504016,0.183333,0.678715,0.463889,NOT_SPEAKING,20TAGRElvfE_1740_1800:1,20TAGRElvfE,0
2,20TAGRElvfE,1740.07,0.504016,0.180556,0.678715,0.463889,NOT_SPEAKING,20TAGRElvfE_1740_1800:1,20TAGRElvfE,0
3,20TAGRElvfE,1740.11,0.504016,0.183333,0.678715,0.463889,NOT_SPEAKING,20TAGRElvfE_1740_1800:1,20TAGRElvfE,0
4,20TAGRElvfE,1740.16,0.504016,0.183333,0.678715,0.463889,NOT_SPEAKING,20TAGRElvfE_1740_1800:1,20TAGRElvfE,0
...,...,...,...,...,...,...,...,...,...,...
2676309,x-6CtPWVi6E,958.22,0.500000,0.029167,0.705399,0.452083,NOT_SPEAKING,x-6CtPWVi6E_0900_0960:54,x-6CtPWVi6E,0
2676310,x-6CtPWVi6E,958.25,0.500000,0.031250,0.705399,0.454167,NOT_SPEAKING,x-6CtPWVi6E_0900_0960:54,x-6CtPWVi6E,0
2676311,x-6CtPWVi6E,958.29,0.500000,0.031250,0.705399,0.454167,NOT_SPEAKING,x-6CtPWVi6E_0900_0960:54,x-6CtPWVi6E,0
2676312,x-6CtPWVi6E,958.32,0.500000,0.031250,0.705399,0.454167,NOT_SPEAKING,x-6CtPWVi6E_0900_0960:54,x-6CtPWVi6E,0


### Downloading Videos

In [None]:
BASE_DIR = '/content/drive/MyDrive/aifarm/centerstage'
filtered_df = pd.read_csv(f"{BASE_DIR}/data/processed/train_labels_clean.csv")
test_labels = pd.read_csv(f"{BASE_DIR}/data/processed/test_labels_clean.csv")

In [None]:
# === CONFIG ===
VIDEO_LIST_FILE = os.path.join(BASE_DIR, "data/raw/ava_video_list.txt")
DOWNLOAD_DIR = os.path.join(BASE_DIR, "data/videos")
S3_URL_PREFIX = "https://s3.amazonaws.com/ava-dataset/trainval"

os.makedirs(DOWNLOAD_DIR, exist_ok=True)

# Load available video filenames
with open(VIDEO_LIST_FILE, "r") as f:
    available_files = set(line.strip() for line in f.readlines())

# Get unique video IDs from both training and test sets
train_video_ids = set(filtered_df['video_id'].unique())
test_video_ids = set(test_labels['video_id'].unique())

# Union of both sets
required_ids = train_video_ids.union(test_video_ids)

# Match with available video filenames
video_files_to_download = []
for file_name in available_files:
    vid = os.path.splitext(file_name)[0]
    if vid in required_ids:
        video_files_to_download.append(file_name)

print(f"Found {len(video_files_to_download)} downloadable videos out of {len(required_ids)}")

Found 153 downloadable videos out of 153


In [None]:
# === DOWNLOAD FUNCTION ===
def download_video(filename, save_dir):
    url = f"{S3_URL_PREFIX}/{filename}"
    local_path = os.path.join(save_dir, filename)

    if os.path.exists(local_path):
        print(f"Already downloaded: {filename}")
        return

    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()

        with open(local_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        print(f"Downloaded: {filename}")
    except Exception as e:
        print(f"Failed to download {filename}: {e}")

# === BATCH DOWNLOAD ===
for filename in tqdm(video_files_to_download):
    download_video(filename, DOWNLOAD_DIR)

  1%|          | 1/153 [00:10<27:21, 10.80s/it]

Downloaded: fD6VkIRlIRI.mkv


  1%|▏         | 2/153 [00:16<20:18,  8.07s/it]

Downloaded: 9mLYmkonWZQ.mkv


  2%|▏         | 3/153 [01:28<1:32:23, 36.95s/it]

Downloaded: sUVhd0YTKgw.mkv


  3%|▎         | 4/153 [01:33<1:00:37, 24.41s/it]

Downloaded: tghXjom3120.mkv


  3%|▎         | 5/153 [01:37<41:45, 16.93s/it]  

Downloaded: -IELREHX_js.mp4


  4%|▍         | 6/153 [01:54<41:45, 17.04s/it]

Downloaded: Ekwy7wzLfjc.mkv


  5%|▍         | 7/153 [02:00<32:37, 13.41s/it]

Downloaded: OfMdakd4bHI.mkv


  5%|▌         | 8/153 [02:15<33:26, 13.84s/it]

Downloaded: TzaVHtLXOzY.mkv


  6%|▌         | 9/153 [02:21<27:55, 11.64s/it]

Downloaded: 7nHkh4sP5Ks.mkv


  7%|▋         | 10/153 [03:22<1:03:53, 26.81s/it]

Downloaded: xmqSaQPzL1E.mkv


  7%|▋         | 11/153 [03:28<48:22, 20.44s/it]  

Downloaded: UrsCy6qIGoo.mkv


  8%|▊         | 12/153 [03:40<42:00, 17.88s/it]

Downloaded: J1jDc2rTJlg.mkv


  8%|▊         | 13/153 [04:23<59:12, 25.37s/it]

Downloaded: 2fwni_Kjf2M.mkv


  9%|▉         | 14/153 [04:52<1:01:18, 26.46s/it]

Downloaded: rJKeqfTlAeY.mkv


 10%|▉         | 15/153 [04:57<46:28, 20.21s/it]  

Downloaded: vfjywN5CN0Y.mkv


 10%|█         | 16/153 [05:10<40:45, 17.85s/it]

Downloaded: 0f39OWEqJ24.mp4


 11%|█         | 17/153 [05:30<42:01, 18.54s/it]

Downloaded: P60OxWahxBQ.mkv


 12%|█▏        | 18/153 [05:39<35:29, 15.78s/it]

Downloaded: qx2vAO5ofmo.mp4


 12%|█▏        | 19/153 [05:54<34:44, 15.55s/it]

Downloaded: zR725veL-DI.mkv


 13%|█▎        | 20/153 [06:08<33:29, 15.11s/it]

Downloaded: kMy-6RtoOVU.mkv


 14%|█▎        | 21/153 [06:22<32:04, 14.58s/it]

Downloaded: Ma2hgTmveKQ.mkv


 14%|█▍        | 22/153 [06:32<28:41, 13.14s/it]

Downloaded: P90hF2S1JzA.mkv


 15%|█▌        | 23/153 [07:13<47:04, 21.72s/it]

Downloaded: uPJPNPbWMFk.mp4


 16%|█▌        | 24/153 [07:23<39:11, 18.23s/it]

Downloaded: rUYsoIIE37A.mp4


 16%|█▋        | 25/153 [07:29<31:03, 14.56s/it]

Downloaded: 914yZXz-iRs.mkv


 17%|█▋        | 26/153 [07:56<38:35, 18.23s/it]

Downloaded: BCiuXAuCKAU.mp4


 18%|█▊        | 27/153 [08:06<32:55, 15.68s/it]

Downloaded: Gvp-cj3bmIY.webm


 18%|█▊        | 28/153 [08:27<35:53, 17.23s/it]

Downloaded: QCLQYnt3aMo.webm


 19%|█▉        | 29/153 [08:37<30:58, 14.99s/it]

Downloaded: 5YPjcdLbs5g.mkv


 20%|█▉        | 30/153 [08:45<26:49, 13.09s/it]

Downloaded: E7JcKooKVsM.mp4


 20%|██        | 31/153 [08:57<25:42, 12.64s/it]

Downloaded: LgBQlW6OTr0.mp4


 21%|██        | 32/153 [09:03<21:52, 10.85s/it]

Downloaded: uzPI7FcF79U.mkv


 22%|██▏       | 33/153 [09:38<36:04, 18.04s/it]

Downloaded: Ag-pXiLrd48.mp4


 22%|██▏       | 34/153 [09:57<36:05, 18.20s/it]

Downloaded: phVLLTMzmKk.mkv


 23%|██▎       | 35/153 [10:06<30:27, 15.49s/it]

Downloaded: tjqCzVjojCo.mkv


 24%|██▎       | 36/153 [10:23<31:22, 16.09s/it]

Downloaded: j5jmjhGBW44.mkv


 24%|██▍       | 37/153 [10:52<38:35, 19.96s/it]

Downloaded: z-fsLpGHq6o.mkv


 25%|██▍       | 38/153 [11:01<31:43, 16.55s/it]

Downloaded: rXFlJbXyZyc.mkv


 25%|██▌       | 39/153 [11:14<29:36, 15.58s/it]

Downloaded: KVq6If6ozMY.mkv


 26%|██▌       | 40/153 [11:23<25:10, 13.37s/it]

Downloaded: HV0H6oc4Kvs.mkv


 27%|██▋       | 41/153 [11:29<20:59, 11.25s/it]

Downloaded: 053oq2xB3oU.mkv


 27%|██▋       | 42/153 [11:42<21:42, 11.73s/it]

Downloaded: UOyyTUX5Vo4.mkv


 28%|██▊       | 43/153 [11:48<18:13,  9.94s/it]

Downloaded: xO4ABy2iOQA.mp4


 29%|██▉       | 44/153 [12:00<19:17, 10.62s/it]

Downloaded: _a9SWtcaNj8.mkv


 29%|██▉       | 45/153 [12:13<20:38, 11.47s/it]

Downloaded: fpprSy6AzKk.mkv


 30%|███       | 46/153 [12:23<19:25, 10.90s/it]

Downloaded: 8aMv-ZGD4ic.mkv


 31%|███       | 47/153 [12:33<18:54, 10.70s/it]

Downloaded: 26V9UzqSguo.mp4


 31%|███▏      | 48/153 [12:43<18:09, 10.38s/it]

Downloaded: yMtGmGa8KZ0.mkv


 32%|███▏      | 49/153 [12:56<19:40, 11.35s/it]

Downloaded: KWoSGtglCms.mkv


 33%|███▎      | 50/153 [13:02<16:26,  9.58s/it]

Downloaded: IzvOYVMltkI.mp4


 33%|███▎      | 51/153 [13:14<17:50, 10.49s/it]

Downloaded: 6d5u6FHvz7Q.mkv


 34%|███▍      | 52/153 [13:20<15:12,  9.04s/it]

Downloaded: 2qQs3Y9OJX0.mkv


 35%|███▍      | 53/153 [13:47<24:11, 14.51s/it]

Downloaded: F3dPH6Xqf5M.mp4


 35%|███▌      | 54/153 [13:53<19:28, 11.80s/it]

Downloaded: -5KQ66BBWC4.mkv


 36%|███▌      | 55/153 [14:09<21:41, 13.28s/it]

Downloaded: WlgxRNCHQzw.mkv


 37%|███▋      | 56/153 [14:28<24:09, 14.94s/it]

Downloaded: Ksd1JQFHYWA.mp4


 37%|███▋      | 57/153 [14:40<22:14, 13.90s/it]

Downloaded: g1wyIcLPbq0.mp4


 38%|███▊      | 58/153 [14:59<24:23, 15.40s/it]

Downloaded: Di1MG6auDYo.mkv


 39%|███▊      | 59/153 [15:17<25:23, 16.21s/it]

Downloaded: Hi8QeP_VPu0.mkv


 39%|███▉      | 60/153 [15:25<21:28, 13.86s/it]

Downloaded: _7oWZq_s_Sk.mkv


 40%|███▉      | 61/153 [15:39<21:21, 13.93s/it]

Downloaded: QMwT7DFA5O4.mkv


 41%|████      | 62/153 [15:45<17:24, 11.48s/it]

Downloaded: N1K2bEZLL_A.mkv


 41%|████      | 63/153 [16:01<19:27, 12.97s/it]

Downloaded: iSlDMboCSao.mkv


 42%|████▏     | 64/153 [16:08<16:27, 11.10s/it]

Downloaded: BY3sZmvUp-0.mp4


 42%|████▏     | 65/153 [16:12<13:19,  9.08s/it]

Downloaded: oq_bufAhyl8.mkv


 43%|████▎     | 66/153 [16:28<15:53, 10.96s/it]

Downloaded: l2XO3tQk8lI.mkv


 44%|████▍     | 67/153 [16:34<13:51,  9.67s/it]

Downloaded: 2bxKkUgcqpk.mp4


 44%|████▍     | 68/153 [16:58<19:34, 13.82s/it]

Downloaded: 7YpF6DntOYw.mkv


 45%|████▌     | 69/153 [17:02<15:13, 10.87s/it]

Downloaded: zC5Fh2tTS1U.mp4


 46%|████▌     | 70/153 [17:06<12:09,  8.79s/it]

Downloaded: 2DUITARAsWQ.mp4


 46%|████▋     | 71/153 [17:29<17:42, 12.96s/it]

Downloaded: o4xQ-BEa3Ss.mkv


 47%|████▋     | 72/153 [17:43<17:53, 13.26s/it]

Downloaded: skiZueh4lfY.mkv


 48%|████▊     | 73/153 [17:54<16:53, 12.66s/it]

Downloaded: ax3q-RkVIt4.mp4


 48%|████▊     | 74/153 [18:03<15:15, 11.59s/it]

Downloaded: UgZFdrNT6W0.mkv


 49%|████▉     | 75/153 [18:20<17:14, 13.27s/it]

Downloaded: phrYEKv0rmw.mkv


 50%|████▉     | 76/153 [18:26<14:05, 10.98s/it]

Downloaded: 1j20qq1JyX4.mp4


 50%|█████     | 77/153 [18:37<14:06, 11.14s/it]

Downloaded: Riu4ZKk4YdQ.webm


 51%|█████     | 78/153 [18:43<12:01,  9.61s/it]

Downloaded: 32HR3MnDZ8g.mp4


 52%|█████▏    | 79/153 [18:55<12:32, 10.17s/it]

Downloaded: HKjR70GCRPE.mp4


 52%|█████▏    | 80/153 [19:01<10:59,  9.03s/it]

Downloaded: cKA-qeZuH_w.mkv


 53%|█████▎    | 81/153 [19:07<09:47,  8.17s/it]

Downloaded: S0tkhGJjwLA.mkv


 54%|█████▎    | 82/153 [19:19<10:52,  9.20s/it]

Downloaded: IKdBLciu_-A.mp4


 54%|█████▍    | 83/153 [19:29<10:58,  9.41s/it]

Downloaded: rk8Xm0EAOWs.mkv


 55%|█████▍    | 84/153 [20:04<19:47, 17.22s/it]

Downloaded: uwW0ejeosmk.mkv


 56%|█████▌    | 85/153 [20:11<16:02, 14.15s/it]

Downloaded: _mAfwH6i90E.mkv


 56%|█████▌    | 86/153 [20:21<14:22, 12.87s/it]

Downloaded: 5milLu-6bWI.mp4


 57%|█████▋    | 87/153 [20:40<16:17, 14.81s/it]

Downloaded: rFgb2ECMcrY.mkv


 58%|█████▊    | 88/153 [20:57<16:42, 15.42s/it]

Downloaded: oD_wxyTHJ2I.mp4


 58%|█████▊    | 89/153 [21:01<12:41, 11.90s/it]

Downloaded: iK4Y-JKRRAc.mkv


 59%|█████▉    | 90/153 [21:26<16:29, 15.70s/it]

Downloaded: 4ZpjKfu6Cl8.mkv


 59%|█████▉    | 91/153 [21:37<14:51, 14.38s/it]

Downloaded: PmElx9ZVByw.mp4


 60%|██████    | 92/153 [21:56<16:03, 15.80s/it]

Downloaded: WwoTG3_OjUg.mp4


 61%|██████    | 93/153 [22:02<12:56, 12.94s/it]

Downloaded: 9bK05eBt1GM.mp4


 61%|██████▏   | 94/153 [22:15<12:48, 13.03s/it]

Downloaded: CZ2NP8UsPuE.mkv


 62%|██████▏   | 95/153 [22:33<13:52, 14.35s/it]

Downloaded: Db19rWN5BGo.mkv


 63%|██████▎   | 96/153 [23:15<21:34, 22.72s/it]

Downloaded: XV_FF3WC7kA.mkv


 63%|██████▎   | 97/153 [23:20<16:16, 17.43s/it]

Downloaded: O_NYCUhZ9zw.mp4


 64%|██████▍   | 98/153 [23:35<15:06, 16.49s/it]

Downloaded: CrlfWnsS7ac.mkv


 65%|██████▍   | 99/153 [23:51<14:57, 16.62s/it]

Downloaded: U6m3kNFjdTs.mkv


 65%|██████▌   | 100/153 [23:59<12:17, 13.92s/it]

Downloaded: fNcxxBjEOgw.mkv


 66%|██████▌   | 101/153 [24:07<10:26, 12.05s/it]

Downloaded: JNb4nWexD0I.mkv


 67%|██████▋   | 102/153 [24:13<08:40, 10.21s/it]

Downloaded: 55Ihr6uVIDA.mkv


 67%|██████▋   | 103/153 [24:29<10:03, 12.07s/it]

Downloaded: KHHgQ_Pe4cI.mkv


 68%|██████▊   | 104/153 [24:41<09:43, 11.90s/it]

Downloaded: y7ncweROe9U.mkv


 69%|██████▊   | 105/153 [24:55<10:06, 12.64s/it]

Downloaded: 4gVsDd8PV9U.mp4


 69%|██████▉   | 106/153 [25:15<11:33, 14.75s/it]

Downloaded: Kb1fduj-jdY.mp4


 70%|██████▉   | 107/153 [25:20<09:15, 12.07s/it]

Downloaded: gjdgj04FzR0.mp4


 71%|███████   | 108/153 [25:26<07:36, 10.14s/it]

Downloaded: plkJ45_-pMk.mp4


 71%|███████   | 109/153 [25:39<07:56, 10.83s/it]

Downloaded: K_SpqDJnlps.mkv


 72%|███████▏  | 110/153 [25:48<07:22, 10.30s/it]

Downloaded: VsYPP2I0aUQ.mkv


 73%|███████▎  | 111/153 [25:54<06:18,  9.00s/it]

Downloaded: lDmLcWWBp1E.mkv


 73%|███████▎  | 112/153 [26:10<07:46, 11.37s/it]

Downloaded: x-6CtPWVi6E.mkv


 74%|███████▍  | 113/153 [26:19<06:56, 10.41s/it]

Downloaded: G5Yr20A5z_Q.mkv


 75%|███████▍  | 114/153 [26:23<05:39,  8.70s/it]

Downloaded: bhlFavrh7WU.mkv


 75%|███████▌  | 115/153 [26:37<06:27, 10.18s/it]

Downloaded: xp67EC-Hvwk.mkv


 76%|███████▌  | 116/153 [26:55<07:44, 12.55s/it]

Downloaded: B1MAUxpKaV8.mkv


 76%|███████▋  | 117/153 [27:36<12:43, 21.21s/it]

Downloaded: 9F2voT6QWvQ.mkv


 77%|███████▋  | 118/153 [28:10<14:31, 24.90s/it]

Downloaded: tt0t_a1EDCE.mkv


 78%|███████▊  | 119/153 [28:21<11:46, 20.77s/it]

Downloaded: OGNnUvJq9RI.mkv


 78%|███████▊  | 120/153 [28:36<10:29, 19.07s/it]

Downloaded: yn9WN9lsHRE.mkv


 79%|███████▉  | 121/153 [28:42<08:03, 15.12s/it]

Downloaded: 9Y_l9NsnYE0.mp4


 80%|███████▉  | 122/153 [28:45<05:59, 11.60s/it]

Downloaded: a5mEmM6w_ks.mkv


 80%|████████  | 123/153 [28:56<05:40, 11.35s/it]

Downloaded: -FaXLcSFjUI.mp4


 81%|████████  | 124/153 [29:04<04:58, 10.30s/it]

Downloaded: yo-Kg2YxlZs.mkv


 82%|████████▏ | 125/153 [29:13<04:33,  9.76s/it]

Downloaded: l-jxh8gpxuY.mkv


 82%|████████▏ | 126/153 [29:49<07:56, 17.65s/it]

Downloaded: AYebXQ8eUkM.mkv


 83%|████████▎ | 127/153 [29:53<05:55, 13.66s/it]

Downloaded: 2PpxiG0WU18.mkv


 84%|████████▎ | 128/153 [30:06<05:36, 13.46s/it]

Downloaded: b5pRYl_djbs.mp4


 84%|████████▍ | 129/153 [30:15<04:47, 12.00s/it]

Downloaded: c9pEMjPT16M.webm


 85%|████████▍ | 130/153 [30:31<05:09, 13.45s/it]

Downloaded: sADELCyj10I.mkv


 86%|████████▌ | 131/153 [30:38<04:08, 11.29s/it]

Downloaded: 20TAGRElvfE.mkv


 86%|████████▋ | 132/153 [31:23<07:28, 21.37s/it]

Downloaded: C25wkwAMB-w.mkv


 87%|████████▋ | 133/153 [31:42<06:52, 20.65s/it]

Downloaded: 5BDj0ow5hnA.mp4


 88%|████████▊ | 134/153 [31:51<05:29, 17.32s/it]

Downloaded: 2XeFK-DTSZk.mkv


 88%|████████▊ | 135/153 [32:01<04:29, 14.95s/it]

Downloaded: PNZQ2UJfyQE.mp4


 89%|████████▉ | 136/153 [32:11<03:53, 13.75s/it]

Downloaded: N0Dt9i9IUNg.mkv


 90%|████████▉ | 137/153 [32:15<02:48, 10.54s/it]

Downloaded: N5UD8FGzDek.mkv


 90%|█████████ | 138/153 [32:31<03:03, 12.21s/it]

Downloaded: J4bt4y9ShTA.mkv


 91%|█████████ | 139/153 [32:40<02:37, 11.27s/it]

Downloaded: qrkff49p4E4.mp4


 92%|█████████▏| 140/153 [33:04<03:18, 15.24s/it]

Downloaded: jgAwJ0RqmYg.mp4


 92%|█████████▏| 141/153 [33:10<02:28, 12.34s/it]

Downloaded: u1ltv6r14KQ.mkv


 93%|█████████▎| 142/153 [33:16<01:56, 10.58s/it]

Downloaded: 8nO5FFbIAog.webm


 93%|█████████▎| 143/153 [33:38<02:20, 14.05s/it]

Downloaded: Ov0za6Xb1LM.mkv


 94%|█████████▍| 144/153 [34:09<02:51, 19.01s/it]

Downloaded: xJmRNZVDDCY.mkv


 95%|█████████▍| 145/153 [34:27<02:30, 18.84s/it]

Downloaded: lWXhqIAvarw.mkv


 95%|█████████▌| 146/153 [34:43<02:05, 17.89s/it]

Downloaded: U_WzY2k8IBM.mkv


 96%|█████████▌| 147/153 [35:00<01:45, 17.58s/it]

Downloaded: uNT6HrrnqPU.webm


 97%|█████████▋| 148/153 [35:10<01:16, 15.36s/it]

Downloaded: T-Fc9ctuNVI.mkv


 97%|█████████▋| 149/153 [35:19<00:53, 13.49s/it]

Downloaded: cWYJHb25EVs.mp4


 98%|█████████▊| 150/153 [35:27<00:35, 11.80s/it]

Downloaded: hHgg9WI8dTk.mkv


 99%|█████████▊| 151/153 [35:59<00:35, 17.73s/it]

Downloaded: t1LXrJOvPDg.mkv


 99%|█████████▉| 152/153 [36:04<00:14, 14.12s/it]

Downloaded: AN07xQokfiE.mp4


100%|██████████| 153/153 [36:17<00:00, 14.23s/it]

Downloaded: tNpZtigMc4g.mkv





### Extracting Faces from Videos

In [3]:
BASE_DIR = '/content/drive/MyDrive/aifarm/centerstage'
filtered_df = pd.read_csv(f"{BASE_DIR}/data/processed/train_labels_clean.csv")
test_labels = pd.read_csv(f"{BASE_DIR}/data/processed/test_labels_clean.csv")

In [None]:
LABEL_TO_FOLDER = {
    'SPEAKING_AUDIBLE': 'speaking',
    'SPEAKING_NOT_AUDIBLE': 'speaking',
    'NOT_SPEAKING': 'not_speaking'
}

os.makedirs(SAVE_ROOT, exist_ok=True)

def extract_faces_from_video(video_path, video_df, save_root, split):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening {video_path}")
        return

    fps = cap.get(cv2.CAP_PROP_FPS)
    if fps <= 0:
        print(f"Invalid FPS for {video_path}, skipping.")
        return

    h, w = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    video_id = os.path.splitext(os.path.basename(video_path))[0]
    video_annots = video_df[video_df['video_id'] == video_id]

    for _, row in video_annots.iterrows():
        frame_idx = int(float(row['timestamp']) * fps)
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        success, frame = cap.read()
        if not success:
            continue

        x1 = int(float(row['x1']) * w)
        y1 = int(float(row['y1']) * h)
        x2 = int(float(row['x2']) * w)
        y2 = int(float(row['y2']) * h)

        face = frame[y1:y2, x1:x2]
        if face.size == 0:
            continue

        label = row['label']
        class_folder = LABEL_TO_FOLDER.get(label)
        if class_folder is None:
            continue

        # === Subfolder by video ID ===
        save_dir = os.path.join(save_root, split, class_folder, video_id)
        os.makedirs(save_dir, exist_ok=True)

        # Filename: frame number + entity ID
        filename = f"{frame_idx}_{row['entity_id']}.jpg"
        save_path = os.path.join(save_dir, filename)
        cv2.imwrite(save_path, face)

    cap.release()

In [None]:
SAVE_ROOT = "/content/drive/MyDrive/aifarm/centerstage/data/faces"
VIDEO_DIR = "/content/drive/MyDrive/aifarm/centerstage/data/videos"

def find_video_path(video_id, video_dir=VIDEO_DIR):
    matches = glob.glob(os.path.join(video_dir, f"{video_id}.*"))
    return matches[0] if matches else None

def is_video_already_extracted(video_id, save_root, split):
    speaking_path = os.path.join(save_root, split, "speaking", video_id)
    nonspeaking_path = os.path.join(save_root, split, "not_speaking", video_id)

    # Check if folder exists and contains any .jpg files
    has_speaking = os.path.exists(speaking_path) and glob.glob(os.path.join(speaking_path, "*.jpg"))
    has_nonspeaking = os.path.exists(nonspeaking_path) and glob.glob(os.path.join(nonspeaking_path, "*.jpg"))

    return has_speaking or has_nonspeaking

# Train set
for vid in tqdm(filtered_df['video_id'].unique(), desc="Train Set"):
    if is_video_already_extracted(vid, SAVE_ROOT, split="train"):
        print(f"Video {vid} already processed. Skipping.")
        continue

    path = find_video_path(vid)
    if path:
        print(f" Extracting faces from video {vid}")
        extract_faces_from_video(path, filtered_df, SAVE_ROOT, split="train")

# Test set
for vid in tqdm(test_labels['video_id'].unique(), desc="Test Set"):
    if is_video_already_extracted(vid, SAVE_ROOT, split="test"):
        print(f"Video {vid} already processed. Skipping.")
        continue

    path = find_video_path(vid)
    if path:
        print(f"\nExtracting faces from video {vid}")
        extract_faces_from_video(path, test_labels, SAVE_ROOT, split="test")

Train Set:   1%|          | 1/120 [00:56<1:52:19, 56.63s/it]

Video 20TAGRElvfE already processed. Skipping.


Train Set:   2%|▏         | 2/120 [01:42<1:38:29, 50.08s/it]

Video 4gVsDd8PV9U already processed. Skipping.


Train Set:   2%|▎         | 3/120 [02:16<1:23:41, 42.92s/it]

Video 32HR3MnDZ8g already processed. Skipping.


Train Set:   3%|▎         | 4/120 [02:53<1:18:48, 40.76s/it]

Video 2bxKkUgcqpk already processed. Skipping.


Train Set:   4%|▍         | 5/120 [03:00<54:39, 28.51s/it]  

Video 55Ihr6uVIDA already processed. Skipping.


Train Set:   5%|▌         | 6/120 [03:19<48:03, 25.29s/it]

Video 2PpxiG0WU18 already processed. Skipping.


Train Set:   6%|▌         | 7/120 [03:23<34:19, 18.22s/it]

Video 26V9UzqSguo already processed. Skipping.


Train Set:   7%|▋         | 8/120 [03:33<29:19, 15.71s/it]

Video 2fwni_Kjf2M already processed. Skipping.


Train Set:   8%|▊         | 9/120 [03:34<20:09, 10.90s/it]

Video 0f39OWEqJ24 already processed. Skipping.


Train Set:   8%|▊         | 10/120 [03:35<14:31,  7.92s/it]

Video 2XeFK-DTSZk already processed. Skipping.


Train Set:   9%|▉         | 11/120 [03:42<13:50,  7.62s/it]

Video 9Y_l9NsnYE0 already processed. Skipping.


Train Set:  10%|█         | 12/120 [03:42<09:40,  5.37s/it]

Video 914yZXz-iRs already processed. Skipping.


Train Set:  11%|█         | 13/120 [03:42<06:49,  3.82s/it]

Video 9mLYmkonWZQ already processed. Skipping.


Train Set:  12%|█▏        | 14/120 [03:43<04:50,  2.74s/it]

Video 6d5u6FHvz7Q already processed. Skipping.


Train Set:  12%|█▎        | 15/120 [03:43<03:31,  2.02s/it]

Video 5YPjcdLbs5g already processed. Skipping.


Train Set:  13%|█▎        | 16/120 [03:43<02:37,  1.51s/it]

Video 5BDj0ow5hnA already processed. Skipping.


Train Set:  14%|█▍        | 17/120 [03:44<01:56,  1.14s/it]

Video 9bK05eBt1GM already processed. Skipping.


Train Set:  15%|█▌        | 18/120 [03:44<01:27,  1.16it/s]

Video -5KQ66BBWC4 already processed. Skipping.


Train Set:  16%|█▌        | 19/120 [03:45<01:36,  1.05it/s]

Video 9F2voT6QWvQ already processed. Skipping.
 Extracting faces from video 8aMv-ZGD4ic


Train Set:  17%|█▋        | 20/120 [36:43<16:31:11, 594.72s/it]

 Extracting faces from video 7nHkh4sP5Ks


Train Set:  18%|█▊        | 21/120 [1:06:57<26:24:50, 960.51s/it]

 Extracting faces from video BY3sZmvUp-0


Train Set:  18%|█▊        | 22/120 [1:34:49<31:57:45, 1174.14s/it]

 Extracting faces from video CrlfWnsS7ac


Train Set:  19%|█▉        | 23/120 [2:31:49<49:47:51, 1848.16s/it]

 Extracting faces from video cWYJHb25EVs


Train Set:  20%|██        | 24/120 [2:52:26<44:23:31, 1664.70s/it]

 Extracting faces from video B1MAUxpKaV8


Train Set:  21%|██        | 25/120 [4:11:31<68:19:11, 2588.96s/it]

 Extracting faces from video b5pRYl_djbs


Train Set:  22%|██▏       | 26/120 [5:28:55<83:42:02, 3205.56s/it]

 Extracting faces from video bhlFavrh7WU


Train Set:  22%|██▎       | 27/120 [5:41:16<63:42:13, 2465.95s/it]

 Extracting faces from video cKA-qeZuH_w


Train Set:  23%|██▎       | 28/120 [6:03:37<54:23:51, 2128.60s/it]

 Extracting faces from video AN07xQokfiE


Train Set:  24%|██▍       | 29/120 [6:43:28<55:47:43, 2207.29s/it]

 Extracting faces from video _a9SWtcaNj8


Train Set:  25%|██▌       | 30/120 [7:12:41<51:46:14, 2070.83s/it]

 Extracting faces from video AYebXQ8eUkM


### Analyse if the extracted data is accurate

In [7]:
def count_images_in_video_subfolder(video_id, folder_path):
    subfolder = os.path.join(folder_path, video_id)
    if not os.path.exists(subfolder):
        return 0
    pattern = os.path.join(subfolder, "*.jpg")
    return len(glob.glob(pattern))

folder = f"{BASE_DIR}/data/faces/train/speaking"  # or test/not_speaking, etc.
video_id = "2PpxiG0WU18"

count = count_images_in_video_subfolder(video_id, folder)
print(f"Video '{video_id}' has {count} image(s) in '{folder}'")

Video '2PpxiG0WU18' has 3538 image(s) in '/content/drive/MyDrive/aifarm/centerstage/data/faces/train/speaking'


In [9]:
folder = f"{BASE_DIR}/data/faces/train/not_speaking"  # or test/not_speaking, etc.
video_id = "2PpxiG0WU18"

count = count_images_in_video_subfolder(video_id, folder)
print(f"Video '{video_id}' has {count} image(s) in '{folder}'")

Video '2PpxiG0WU18' has 18023 image(s) in '/content/drive/MyDrive/aifarm/centerstage/data/faces/train/not_speaking'


In [8]:
filtered_df[(filtered_df['binary_label'] == 1) & (filtered_df['video_id'] == '2PpxiG0WU18')].shape

(3710, 10)

In [10]:
filtered_df[(filtered_df['binary_label'] == 0) & (filtered_df['video_id'] == '2PpxiG0WU18')].shape

(19148, 10)

In [None]:
import cv2

cap = cv2.VideoCapture(f"{BASE_DIR}/data/videos/7nHkh4sP5Ks.mkv")
fps = cap.get(cv2.CAP_PROP_FPS)
print("FPS:", fps)
cap.release()