In [1]:
pip install opencv-python numpy pandas tqdm

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [16]:
pip install scikit-image

Collecting scikit-image
  Downloading scikit_image-0.25.1-cp312-cp312-win_amd64.whl.metadata (14 kB)
Collecting tifffile>=2022.8.12 (from scikit-image)
  Downloading tifffile-2025.1.10-py3-none-any.whl.metadata (31 kB)
Collecting lazy-loader>=0.4 (from scikit-image)
  Downloading lazy_loader-0.4-py3-none-any.whl.metadata (7.6 kB)
Downloading scikit_image-0.25.1-cp312-cp312-win_amd64.whl (12.9 MB)
   ---------------------------------------- 0.0/12.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/12.9 MB 1.3 MB/s eta 0:00:11
   ---------------------------------------- 0.1/12.9 MB 812.7 kB/s eta 0:00:16
   ---------------------------------------- 0.1/12.9 MB 1.0 MB/s eta 0:00:13
   ---------------------------------------- 0.2/12.9 MB 1.0 MB/s eta 0:00:13
    --------------------------------------- 0.3/12.9 MB 1.3 MB/s eta 0:00:10
   - -------------------------------------- 0.3/12.9 MB 1.3 MB/s eta 0:00:10
   - -------------------------------------- 0.4/12.9 MB 1.4 MB/s e


[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import cv2
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

In [17]:
from skimage.metrics import structural_similarity as ssim

In [5]:
video_dir = "E:\\SafeReels"  
csv_file = "E:\\SafeReels\\final_data.csv"  
output_dir = "E:\\SafeReels\\Frames"  
os.makedirs(output_dir, exist_ok=True)

In [6]:
df = pd.read_csv(csv_file)
df

Unnamed: 0,reel_id,UnsafeType,isSafe
0,'1000188341415358,Safe,1
1,'1006146964533997,Safe,1
2,'1007123741459823,Adult,0
3,'1007420607202244,Safe,1
4,'1008519870870927,Safe,1
...,...,...,...
661,'997262608752854,Adult,0
662,'997300345735398,Harmful,0
663,'998187401354206,Safe,1
664,'999558344609179,Safe,1


In [8]:
video_list = []

In [9]:
for reel_id in df['reel_id']:
    clean_reel_id = reel_id.strip("'")
    video_file = f"{clean_reel_id}.mp4"
    video_list.append(video_file)

In [11]:
len(video_list)

666

In [13]:
L = 5  
frame_size = (224, 224)  
similarity_threshold = 0.90  
max_search_range = 4

In [18]:
def get_frame_similarity(frame1, frame2):
    gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
    return ssim(gray1, gray2)

In [19]:
def extract_frames(video_path, save_dir, L=5, frame_size=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video: {video_path}")
        return

    N = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  
    fps = cap.get(cv2.CAP_PROP_FPS)  
    W = max(N // L, 1)  

    extracted_frames = []
    prev_frame = None  

    for i in range(L):
        frame_idx = min(i * W, N - 1)  
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        ret, frame = cap.read()
        
        if not ret:
            break  

        frame = cv2.resize(frame, frame_size)  

        if prev_frame is not None:
            similarity = get_frame_similarity(prev_frame, frame)
            
            if similarity > similarity_threshold:
                found_different_frame = False
                for j in range(1, max_search_range + 1):
                    search_idx = min(frame_idx + j, N - 1)
                    cap.set(cv2.CAP_PROP_POS_FRAMES, search_idx)
                    ret, new_frame = cap.read()
                    
                    if not ret:
                        break

                    new_frame = cv2.resize(new_frame, frame_size)
                    similarity = get_frame_similarity(prev_frame, new_frame)

                    if similarity < similarity_threshold:
                        frame = new_frame  
                        found_different_frame = True
                        break

                if not found_different_frame:
                    frame = new_frame  


        extracted_frames.append(frame)
        frame_filename = os.path.join(save_dir, f"{os.path.basename(video_path)}_frame{i}.jpg")
        cv2.imwrite(frame_filename, frame)

        prev_frame = frame  

    cap.release()
    return extracted_frames

In [20]:
for video_file in tqdm(video_list, desc="Processing videos"):
    video_path = os.path.join(video_dir, video_file)
    video_save_dir = os.path.join(output_dir, os.path.splitext(video_file)[0])
    os.makedirs(video_save_dir, exist_ok=True)
    extract_frames(video_path, video_save_dir, L, frame_size)

Processing videos: 100%|█████████████████████████████████████████████████████████████| 666/666 [07:12<00:00,  1.54it/s]
