In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install opencv-python mediapipe numpy Pillow tqdm av torch torchvision pytorchvideo

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting av
  Downloading av-14.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.6 kB)
Collecting pytorchvideo
  Downloading pytorchvideo-0.1.5.tar.gz (132 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.7/132.7 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting numpy
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.7-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.2-py3-none-any.whl.metadata (1.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==

In [None]:
!pip uninstall -y numpy
!pip install numpy==1.26.4 --force-reinstall

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
Installing collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ydf 0.11.0 requires protobuf<6.0.0,>=5.29.1, but you have protobuf 4.25.7 which is incompatible.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.[0m[31m
[0mSuccessfully installed numpy-1.26.4


In [None]:
!pip install ultralytics


Collecting ultralytics
  Downloading ultralytics-8.3.140-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.140-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.14-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.140 ultralytics-thop-2.0.14


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from ultralytics import YOLO
import mediapipe as mp

# === PATHS ===
BASE_PATH = "/content/drive/MyDrive/FINAL DATASET"
CSV_OUTPUT_PATH = "/content/drive/MyDrive/FINAL DATASET/frame_level_feature_data.csv"

# === Setup Models ===
mp_pose = mp.solutions.pose
pose_model = mp_pose.Pose(static_image_mode=False)
yolo = YOLO("yolov8n.pt")

# === Feature Extraction ===
frame_level_data = []

def process_video(video_path, category):
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_area = frame_width * frame_height
    video_name = os.path.basename(video_path)

    frame_index = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # --- YOLO Person Detection ---
        results = yolo.predict(frame, verbose=False)[0]
        person_boxes = [b for b in results.boxes if int(b.cls[0]) == 0]
        if person_boxes:
            x1, y1, x2, y2 = person_boxes[0].xyxy[0].cpu().numpy()
            x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
            bbox_area = (x2 - x1) * (y2 - y1)
            center_x = (x1 + x2) // 2
            center_y = (y1 + y2) // 2
            distance_to_center = np.linalg.norm([center_x - frame_width // 2, center_y - frame_height // 2])
        else:
            bbox_area = 0
            center_x = center_y = frame_width // 2
            distance_to_center = 0

        # --- Pose Keypoints ---
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        result = pose_model.process(frame_rgb)

        keypoints = []
        if result.pose_landmarks:
            for lm in result.pose_landmarks.landmark:
                x = int(lm.x * frame_width)
                y = int(lm.y * frame_height)
                keypoints.extend([x, y])
        else:
            keypoints = [0] * 66  # 33 keypoints × 2

        frame_level_data.append({
            "category": category,
            "video_name": video_name,
            "frame": frame_index,
            "frame_area": frame_area,
            "frame_width": frame_width,
            "frame_height": frame_height,
            "bbox_area": bbox_area,
            "center_x": center_x,
            "center_y": center_y,
            "distance_to_center": distance_to_center,
            **{f"pose_{i}": v for i, v in enumerate(keypoints)}
        })

        frame_index += 1

    cap.release()

# Traverse dataset and extract frame-level features
for root, dirs, files in os.walk(BASE_PATH):
    for file in tqdm(sorted(files)):
        if file.endswith(".mp4"):
            video_path = os.path.join(root, file)
            category = os.path.basename(os.path.dirname(video_path))
            process_video(video_path, category)

# Save frame-level features to CSV
frame_df = pd.DataFrame(frame_level_data)
frame_df.to_csv(CSV_OUTPUT_PATH, index=False)
print(f"✅ Frame-level data saved to: {CSV_OUTPUT_PATH}")

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 108MB/s]
100%|██████████| 2/2 [00:00<00:00, 14952.96it/s]
100%|██████████| 20/20 [02:20<00:00,  7.01s/it]
100%|██████████| 20/20 [02:02<00:00,  6.11s/it]
100%|██████████| 20/20 [02:37<00:00,  7.87s/it]
100%|██████████| 20/20 [02:19<00:00,  6.99s/it]
100%|██████████| 20/20 [02:15<00:00,  6.76s/it]
100%|██████████| 20/20 [02:10<00:00,  6.52s/it]
100%|██████████| 20/20 [02:17<00:00,  6.85s/it]
100%|██████████| 20/20 [01:30<00:00,  4.53s/it]
100%|██████████| 20/20 [03:29<00:00, 10.46s/it]
100%|██████████| 20/20 [03:45<00:00, 11.30s/it]
100%|██████████| 20/20 [04:20<00:00, 13.00s/it]
100%|██████████| 20/20 [04:28<00:00, 13.44s/it]
100%|██████████| 20/20 [04:01<00:00, 12.09s/it]
100%|██████████| 20/20 [05:03<00:00, 15.19s/it]
100%|██████████| 20/20 [04:41<00:00, 14.07s/it]
100%|██████████| 20/20 [05:34<00:00, 16.74s/it]
100%|██████████| 20/20 [03:31<00:00, 10.58s/it]
100%|██████████| 20/20 [04:26<00:00, 13.33s/it]
100%|██████████| 20/20 [05:18<00:00

✅ Frame-level data saved to: /content/drive/MyDrive/FINAL DATASET/frame_level_feature_data.csv


In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
import av
import torch
from torchvision import transforms
from pytorchvideo.models.hub import slowfast_r50

# === PATHS ===
BASE_PATH = "/content/drive/MyDrive/FINAL DATASET"
CSV_OUTPUT_PATH = "/content/drive/MyDrive/FINAL DATASET/video_level_embedding_data.csv"

# === Setup SlowFast Model ===
device = "cuda" if torch.cuda.is_available() else "cpu"
sf_model = slowfast_r50(pretrained=True)
sf_model = torch.nn.Sequential(*list(sf_model.blocks[:-1]))
sf_model.eval().to(device)

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.45] * 3, [0.225] * 3)
])

def prepare_slowfast_inputs(frames):
    fast = torch.stack([transform(f) for f in frames], dim=1)
    slow = fast[:, ::4, :, :]
    return [slow.unsqueeze(0).to(device), fast.unsqueeze(0).to(device)]

def extract_slowfast_embedding(video_path, num_frames=32):
    try:
        container = av.open(video_path)
        frames = [Image.fromarray(f.to_ndarray(format="rgb24")) for f in container.decode(video=0)]
        if not frames:
            return None
        if len(frames) < num_frames:
            frames += [frames[-1]] * (num_frames - len(frames))
        else:
            idxs = np.linspace(0, len(frames) - 1, num_frames).astype(int)
            frames = [frames[i] for i in idxs]

        inputs = prepare_slowfast_inputs(frames)
        with torch.no_grad():
            features = sf_model(inputs).squeeze().cpu().numpy()
        return features
    except Exception as e:
        print(f"[SlowFast Error] {video_path}: {e}")
        return None

# Extract and store video-level embeddings
embedding_data = []

for root, dirs, files in os.walk(BASE_PATH):
    for file in tqdm(sorted(files)):
        if file.endswith(".mp4"):
            video_path = os.path.join(root, file)
            category = os.path.basename(os.path.dirname(video_path))
            video_name = os.path.basename(video_path)
            features = extract_slowfast_embedding(video_path)
            if features is not None:
                row = {"category": category, "video_name": video_name, **{f"embed_{i}": v for i, v in enumerate(features)}}
                embedding_data.append(row)

# Save to CSV
embedding_df = pd.DataFrame(embedding_data)
embedding_df.to_csv(CSV_OUTPUT_PATH, index=False)
print(f"Video-level embeddings saved to: {CSV_OUTPUT_PATH}")

100%|██████████| 1/1 [00:00<00:00, 3032.76it/s]
100%|██████████| 20/20 [00:08<00:00,  2.28it/s]
100%|██████████| 20/20 [00:08<00:00,  2.44it/s]
100%|██████████| 20/20 [00:09<00:00,  2.21it/s]
100%|██████████| 20/20 [00:09<00:00,  2.19it/s]
100%|██████████| 20/20 [00:08<00:00,  2.48it/s]
100%|██████████| 20/20 [00:09<00:00,  2.12it/s]
100%|██████████| 20/20 [00:09<00:00,  2.13it/s]
100%|██████████| 20/20 [00:07<00:00,  2.63it/s]
100%|██████████| 20/20 [00:11<00:00,  1.78it/s]
100%|██████████| 20/20 [00:10<00:00,  1.98it/s]
100%|██████████| 20/20 [00:10<00:00,  1.87it/s]
100%|██████████| 20/20 [00:11<00:00,  1.68it/s]
100%|██████████| 20/20 [00:08<00:00,  2.44it/s]
100%|██████████| 20/20 [00:13<00:00,  1.48it/s]
100%|██████████| 20/20 [00:12<00:00,  1.54it/s]
100%|██████████| 20/20 [00:12<00:00,  1.57it/s]
100%|██████████| 20/20 [00:10<00:00,  1.86it/s]
100%|██████████| 20/20 [00:12<00:00,  1.58it/s]
100%|██████████| 20/20 [00:13<00:00,  1.50it/s]
100%|██████████| 20/20 [00:10<00:00,  1.

Video-level embeddings saved to: /content/drive/MyDrive/FINAL DATASET/video_level_embedding_data.csv
