# **Extract keyframes from videos with scene detection support from TransnetV2**

## **1. Install required packages**

In [1]:
MEMBER_ID = 0    # [0: num_member-1]
num_member = 5

In [2]:
!pip install ffmpeg-python pillow
!git clone https://github.com/soCzech/TransNetV2.git
%cd TransNetV2/inference

import os
import cv2
import json
import glob
import torch
import ffmpeg
import numpy as np
from tqdm.auto import tqdm
from transnetv2 import TransNetV2
from timeit import default_timer as timer

Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0
Cloning into 'TransNetV2'...
remote: Enumerating objects: 362, done.[K
remote: Counting objects: 100% (84/84), done.[K
remote: Compressing objects: 100% (14/14), done.[K
remote: Total 362 (delta 70), reused 70 (delta 70), pack-reused 278 (from 1)[K
Receiving objects: 100% (362/362), 95.27 KiB | 8.66 MiB/s, done.
Resolving deltas: 100% (210/210), done.
/kaggle/working/TransNetV2/inference


2024-08-21 07:24:06.288970: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-21 07:24:06.289095: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-21 07:24:06.417838: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## 2. **Parse video info**

In [3]:
videos_dir = '/kaggle/input/aic2024-videos-part1'
all_video_paths = dict()
for part in sorted(os.listdir(videos_dir)):
    data_part = part.split('_')[-1] # L01, L02 for ex
    all_video_paths[data_part] =  dict()

for data_part in sorted(all_video_paths.keys()):
    data_part_path = f'{videos_dir}/Videos_{data_part}/video'
    video_paths = sorted(os.listdir(data_part_path))
    video_ids = [video_path.replace('.mp4', '').split('_')[-1] for video_path in video_paths]
    for video_id, video_path in zip(video_ids, video_paths):
        video_path_full = f'{data_part_path}/{video_path}'
        all_video_paths[data_part][video_id] = video_path_full

In [4]:
num_batch = 1
BATCH_ID = 0

all_videos = [x for v in all_video_paths.values() for x in v.values()]
batch_len = (len(all_videos) // num_batch)//num_member

all_batches_info = {n: {} for n in range(num_batch)}
start = 0
for n in range(num_batch):
    for m in range(num_member):
        end = start + batch_len
        
        if num_member == 1:
            all_batches_info[n] = all_videos[start:end]
        else:
            all_batches_info[n][m] = all_videos[start:end]
        start = end
        
with open("/kaggle/working/batch_info.json", 'w') as f:
    json.dump(all_batches_info, f)

## 3. **Extract shots**

In [5]:
model = TransNetV2()

[TransNetV2] Using weights from /kaggle/working/TransNetV2/inference/transnetv2-weights/.


In [6]:
%%time

save_dir = '/kaggle/working/scenes'
os.makedirs(save_dir, exist_ok=True)

for video_path in all_batches_info[BATCH_ID][MEMBER_ID]:
    video_batch, video_name = video_path.split('/')[-1].split('_')
    video_name = video_name.replace('.mp4', '')
    os.makedirs(os.path.join(save_dir, video_batch), exist_ok = True)

    _, single_frame_predictions, _ = model.predict_video(video_path)
    scenes = model.predictions_to_scenes(single_frame_predictions)
    
    with open(f"{save_dir}/{video_batch}/{video_name}.json", 'w') as f:
        json.dump(scenes.tolist(), f)

[TransNetV2] Extracting frames from /kaggle/input/aic2024-videos-part1/Videos_L07/video/L07_V001.mp4
[TransNetV2] Processing video frames 26542/26542
[TransNetV2] Extracting frames from /kaggle/input/aic2024-videos-part1/Videos_L07/video/L07_V002.mp4
[TransNetV2] Processing video frames 28065/28065
[TransNetV2] Extracting frames from /kaggle/input/aic2024-videos-part1/Videos_L07/video/L07_V003.mp4
[TransNetV2] Processing video frames 26629/26629
[TransNetV2] Extracting frames from /kaggle/input/aic2024-videos-part1/Videos_L07/video/L07_V004.mp4
[TransNetV2] Processing video frames 32688/32688
[TransNetV2] Extracting frames from /kaggle/input/aic2024-videos-part1/Videos_L07/video/L07_V005.mp4
[TransNetV2] Processing video frames 27074/27074
[TransNetV2] Extracting frames from /kaggle/input/aic2024-videos-part1/Videos_L07/video/L07_V006.mp4
[TransNetV2] Processing video frames 29282/29282
[TransNetV2] Extracting frames from /kaggle/input/aic2024-videos-part1/Videos_L07/video/L07_V007.mp4

## **4. Extract frames from scenes**

In [7]:
def save_frames(video_path: str, frame_numbers: np.ndarray, save_dir: str):
    """
    Extract frames from a video using OpenCV. Assume valid argumentts.

    Args:
        video_path (str): Path to the video file.
        frame_numbers (list): List of frame numbers to extract.

    Returns:
        list: List of extracted frames.
    """
    video = cv2.VideoCapture(video_path)
    frames = np.zeros(shape=len(frame_numbers), dtype=np.ndarray)

    frame_idx = 0
    frame_it = frame_numbers[frame_idx]
    num_frame = frame_numbers[-1] + 1

    for i in tqdm(range(num_frame)):
        ret, frame = video.read()       

        if i != frame_it:
            continue
        if not ret:
            print(f"Frame {i} cannot be retrieved")
            continue
        
        filename = "{}/{:0>4d}.jpg".format(f'{save_dir}', frame_idx)
        frames[frame_idx] = frame
        
        if not cv2.imwrite(filename, frame):
            print(f"Cannot save {filename}")
        
        frame_idx += 1
        if frame_idx < len(frame_numbers):
            frame_it = frame_numbers[frame_idx]

    video.release()

In [12]:
save_dir_all = '/kaggle/working/keyframes'
os.makedirs(save_dir_all, exist_ok=True)
scene_json_dirs = '/kaggle/working/scenes'

for key in all_video_paths.keys():
    video_paths_dict = all_video_paths[key]
    video_ids = sorted(video_paths_dict.keys())
    
    save_dir = f'{save_dir_all}/{key}_extra'
    os.makedirs(save_dir, exist_ok=True)
    
    done = False
    for video_id in tqdm(video_ids):
        save_frames_dir = f'{save_dir}/{video_id}'
        os.makedirs(save_frames_dir, exist_ok=True)
        
        scene_path = f'{scene_json_dirs}/{key}/{video_id}.json'
        if not os.path.exists(scene_path):
            done = True
            break
        with open(scene_path) as f:
            scenes = json.load(f)
            
        scenes = np.array([list(row) for row in scenes])

        frame_numbers = np.zeros(len(scenes) * 3, dtype=int)
        frame_numbers[0::3] = scenes[:, 0]  # start frames
        frame_numbers[1::3] = (scenes[:, 0] + scenes[:, 1]) // 2
        frame_numbers[2::3] = scenes[:, 1]  # end frames
        
        print(f'Extracting data from {key}_{video_id}')
        video_path = video_paths_dict[video_id]
        save_frames(video_path, frame_numbers, save_frames_dir) 

    if done:
        break

  0%|          | 0/31 [00:00<?, ?it/s]

Extracting data from L07_V001


  0%|          | 0/26542 [00:00<?, ?it/s]

Extracting data from L07_V002


  0%|          | 0/28065 [00:00<?, ?it/s]

Extracting data from L07_V003


  0%|          | 0/26629 [00:00<?, ?it/s]

Extracting data from L07_V004


  0%|          | 0/32688 [00:00<?, ?it/s]

Extracting data from L07_V005


  0%|          | 0/27074 [00:00<?, ?it/s]

Extracting data from L07_V006


  0%|          | 0/29282 [00:00<?, ?it/s]

Extracting data from L07_V007


  0%|          | 0/25624 [00:00<?, ?it/s]

Extracting data from L07_V008


  0%|          | 0/29665 [00:00<?, ?it/s]

Extracting data from L07_V009


  0%|          | 0/29093 [00:00<?, ?it/s]

Extracting data from L07_V010


  0%|          | 0/28461 [00:00<?, ?it/s]

Extracting data from L07_V011


  0%|          | 0/32664 [00:00<?, ?it/s]

Extracting data from L07_V012


  0%|          | 0/32510 [00:00<?, ?it/s]

Extracting data from L07_V013


  0%|          | 0/27799 [00:00<?, ?it/s]

Extracting data from L07_V014


  0%|          | 0/26539 [00:00<?, ?it/s]

Extracting data from L07_V015


  0%|          | 0/24831 [00:00<?, ?it/s]

Extracting data from L07_V016


  0%|          | 0/30824 [00:00<?, ?it/s]

Extracting data from L07_V017


  0%|          | 0/25569 [00:00<?, ?it/s]

Extracting data from L07_V018


  0%|          | 0/27549 [00:00<?, ?it/s]

Extracting data from L07_V019


  0%|          | 0/30661 [00:00<?, ?it/s]

Extracting data from L07_V020


  0%|          | 0/27307 [00:00<?, ?it/s]

Extracting data from L07_V021


  0%|          | 0/25387 [00:00<?, ?it/s]

Extracting data from L07_V022


  0%|          | 0/27745 [00:00<?, ?it/s]

Extracting data from L07_V023


  0%|          | 0/30142 [00:00<?, ?it/s]

Extracting data from L07_V024


  0%|          | 0/23551 [00:00<?, ?it/s]

Extracting data from L07_V025


  0%|          | 0/29725 [00:00<?, ?it/s]

Extracting data from L07_V026


  0%|          | 0/29551 [00:00<?, ?it/s]

Extracting data from L07_V027


  0%|          | 0/27824 [00:00<?, ?it/s]

Extracting data from L07_V028


  0%|          | 0/29633 [00:00<?, ?it/s]

Extracting data from L07_V029


  0%|          | 0/28539 [00:00<?, ?it/s]

Extracting data from L07_V030


  0%|          | 0/29171 [00:00<?, ?it/s]

Extracting data from L07_V031


  0%|          | 0/32347 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Extracting data from L08_V001


  0%|          | 0/32438 [00:00<?, ?it/s]

Extracting data from L08_V002


  0%|          | 0/28666 [00:00<?, ?it/s]

Extracting data from L08_V003


  0%|          | 0/31747 [00:00<?, ?it/s]

Extracting data from L08_V004


  0%|          | 0/30387 [00:00<?, ?it/s]

In [13]:
!rm -r /kaggle/working/TransNetV2
!rm /kaggle/working/batch_info.json

shell-init: error retrieving current directory: getcwd: cannot access parent directories: No such file or directory
