In [1]:
import os
import cv2
import torch
import numpy as np
from torchvision import models, transforms
from PIL import Image
from sklearn.cluster import KMeans
import random

In [2]:
resnet = models.resnet50(pretrained=True)
resnet = torch.nn.Sequential(*list(resnet.children())[:-1])  
resnet.eval()



Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [3]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [4]:
def extract_features(video_path, sample_rate):
    cap = cv2.VideoCapture(video_path)
    features, frames = [], []
    idx = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if idx % sample_rate == 0:  
            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            img = Image.fromarray(rgb)
            tensor = transform(img).unsqueeze(0)  # [1,3,224,224]

            with torch.no_grad():
                feat = resnet(tensor)  # [1,2048,1,1]
                feat = feat.squeeze().cpu().numpy()
            features.append(feat)
            frames.append(frame)
        idx += 1
    cap.release()
    return np.array(features), frames

In [18]:
import matplotlib.pyplot as plt
from scipy.signal import argrelextrema

def extract_keyframes_from_frames(frames, step=10):
    """
    Extract keyframes from a list of frames using motion-based key moment detection.
    Parameters:
        frames (list): List of consecutive video frames (as numpy arrays).
        step (int): Process every nth frame for speed.
    Returns:
        keyframes (list): List of selected keyframes.
        key_indices (list): Frame indices of selected keyframes.
    """

    if len(frames) < 2:
        print("Not enough frames for motion analysis.")
        return [], []

    prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY)
    pdm_values = []
    processed_frames = []
    idx = 0

    for frame in frames[1:]:
        if idx % step != 0:
            idx += 1
            continue

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Optical flow (motion vectors between frames)
        flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None,
                                            0.5, 3, 15, 3, 5, 1.2, 0)

        mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
        AM = np.mean(mag) + 1e-6  # average movement

        # Direction bins (8 bins of 45° each)
        bins = np.zeros(8)
        bin_indices = np.floor(ang * 8 / (2 * np.pi)).astype(int) % 8
        for b in range(8):
            bins[b] = np.sum(mag[bin_indices == b])

        # Perceived Dominant Movement (PDM)
        PDM = np.max(bins) / AM
        pdm_values.append(PDM)
        processed_frames.append((idx, frame))

        prev_gray = gray
        idx += 1

    # Convert to numpy for easier math
    pdm_arr = np.array(pdm_values)

    # Find local maxima & minima
    maxima = argrelextrema(pdm_arr, np.greater)[0]
    minima = argrelextrema(pdm_arr, np.less)[0]

    keyframes = []
    key_indices = []

    # Extract key moments (maxima between minima)
    for i in range(len(minima) - 1):
        seg_min1, seg_min2 = minima[i], minima[i + 1]
        seg_max = [m for m in maxima if seg_min1 < m < seg_min2]
        if seg_max:  # if maxima exists between minima
            best_idx = seg_max[np.argmax(pdm_arr[seg_max])]
            frame_no, key_frame = processed_frames[best_idx]
            keyframes.append(key_frame)
            key_indices.append(frame_no)

    return keyframes, key_indices

In [19]:
video_folder = "videos/"
output_root = "keyframes1/"
os.makedirs(output_root, exist_ok=True)

In [20]:
video_files = [f for f in os.listdir(video_folder) if f.endswith((".mp4", ".avi", ".mov"))]

for n,video_file in enumerate(video_files):
    video_path = os.path.join(video_folder, video_file)
    print(f"Processing {video_file}...")

    
    features, frames = extract_features(video_path, sample_rate=10)
    if len(frames) == 0:
        print(f"⚠️ Skipped {video_file} (too short)")
        continue

    # K = int(random.uniform(0.05,0.15) * len(frames))
    keyframes, indices = extract_keyframes_from_frames(frames)

    
    base_name = str(n+1)
    out_dir = os.path.join(output_root, base_name)
    os.makedirs(out_dir, exist_ok=True)

    
    for i, (frame, idx) in enumerate(zip(keyframes, indices)):
        out_path = os.path.join(out_dir, f"keyframe_{i+1}_frame{idx}.jpg")
        cv2.imwrite(out_path, frame)
        print(f"Saved {out_path}")

   
    txt_path = os.path.join(out_dir, f"{n+1}.txt")
    with open(txt_path, "w") as f:
        for idx in indices:
            f.write(str(idx) + "\n")

    print(f"Saved indices to {txt_path}")

Processing Air_Force_One.mp4...
Saved keyframes1/1\keyframe_1_frame30.jpg
Saved keyframes1/1\keyframe_2_frame90.jpg
Saved keyframes1/1\keyframe_3_frame120.jpg
Saved keyframes1/1\keyframe_4_frame140.jpg
Saved keyframes1/1\keyframe_5_frame160.jpg
Saved keyframes1/1\keyframe_6_frame200.jpg
Saved keyframes1/1\keyframe_7_frame220.jpg
Saved keyframes1/1\keyframe_8_frame250.jpg
Saved keyframes1/1\keyframe_9_frame270.jpg
Saved keyframes1/1\keyframe_10_frame310.jpg
Saved keyframes1/1\keyframe_11_frame330.jpg
Saved keyframes1/1\keyframe_12_frame360.jpg
Saved keyframes1/1\keyframe_13_frame390.jpg
Saved indices to keyframes1/1\1.txt
Processing Base jumping.mp4...
Saved keyframes1/2\keyframe_1_frame40.jpg
Saved keyframes1/2\keyframe_2_frame70.jpg
Saved keyframes1/2\keyframe_3_frame100.jpg
Saved keyframes1/2\keyframe_4_frame130.jpg
Saved keyframes1/2\keyframe_5_frame170.jpg
Saved keyframes1/2\keyframe_6_frame200.jpg
Saved keyframes1/2\keyframe_7_frame240.jpg
Saved keyframes1/2\keyframe_8_frame280.jp