In [None]:
!pip -q install scenedetect

In [None]:
import os
import cv2
import time
import math
from PIL import Image
from datetime import timedelta
from scenedetect import detect, ContentDetector
from concurrent.futures import ThreadPoolExecutor, as_completed

In [None]:
#Helper function

def bytes_to_mb(bytes):
    megabytes = bytes / (1024 * 1024)
    return megabytes

def get_folder_size(folder_path):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(folder_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            # Skip if it is symbolic link
            if not os.path.islink(fp):
                total_size += os.path.getsize(fp)
    return bytes_to_mb(total_size)

In [None]:
main_path = ['/kaggle/input/videos-l01-zip','/kaggle/input/videos-l02-zip',
             '/kaggle/input/videos-l03-zip','/kaggle/input/videos-l04-zip',
             '/kaggle/input/videos-l05-zip','/kaggle/input/videos-l06-zip',
             '/kaggle/input/videos-l07-zip','/kaggle/input/videos-l08-zip',
             '/kaggle/input/videos-l09-zip','/kaggle/input/videos-l10-zip',
             '/kaggle/input/videos-l11-zip','/kaggle/input/videos-l12-zip',
            ]

In [None]:
cwd = os.getcwd()
print(f'Current working directory: {cwd}') 

current_path = '/kaggle/input/videos-l07-zip' #Chỉnh chỗ này để lấy folder khác
print(f'Current path: {current_path}')

current_child_path = os.path.join(current_path, 'video')
print(f'Current child path: {current_child_path}')

In [None]:
# Calculate total size of a folder
folder_size = get_folder_size(current_child_path)
print(f"Total size of the folder is: {folder_size} MB")

In [None]:
video_folder_name = os.path.basename(current_path).replace('-zip', '')
print(f'Video folder name: {video_folder_name}')
key_frame_folder = os.path.join(cwd, 'key_frame_folder_'+video_folder_name)
print(f'Keyframes folder: {key_frame_folder}')
key_frame_folder_zip = key_frame_folder+'.zip'
print(f'Keyframe folder (zip): {key_frame_folder_zip}')

In [None]:
def get_frame_at_timestamp(video_path, key_frame_no, timestamp, dest):
    # Convert timedelta to seconds
    time_in_seconds = timestamp.total_seconds()
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    # Set the video position to the given timestamp
    cap.set(cv2.CAP_PROP_POS_MSEC, time_in_seconds * 1000)
    # Read the frame at the given timestamp
    success, frame = cap.read()
    if success:
        # Save the frame to an image file (optional)
        if not os.path.exists(dest):
            os.makedirs(dest)
        frame_no_str = f'{key_frame_no:07d}'
        cv2.imwrite(f'{dest}/{frame_no_str}_{str(time_in_seconds)}.jpg', frame)
    else:
        print(f"Failed to capture frame at the given timestamp for \
        \n{video_path}, \n{key_frame_no}, \n{timestamp}, \n{dest}")
    # Release the video capture object
    cap.release()
    #cv2.destroyAllWindows()

def time_str_to_timedelta(time_str):
    h, m, s = map(float, time_str.split(':'))
    return timedelta(hours=h, minutes=m, seconds=s)

def single_video_KFDetector(video_path, root):
    print(f'Video path: {video_path}')
    scene_list = detect(video_path, ContentDetector())
    print(f'Scene detection: DONE with total {len(scene_list)} scenes')
    folder_name = os.path.splitext(os.path.basename(video_path))[0]
    dest = f'{root}/keyframe_{folder_name}'
    print(f'Keyframe dest: {dest}')
    
    for scene in scene_list:
        start_time = time_str_to_timedelta(scene[0].get_timecode())
        end_time = time_str_to_timedelta(scene[1].get_timecode())
        start_frame_no = scene[0].get_frames()
        end_frame_no = scene[1].get_frames()
        get_frame_at_timestamp(video_path, start_frame_no, start_time, dest)
        
    folder_size = get_folder_size(dest)
    print(f'DONE with total {len(os.listdir(dest))} scenes')
    print(f"Total size of the folder is: {folder_size} MB")
    
def multiple_video_KFDetector(multiple_video_folder, key_frame_folder):
    os.makedirs(key_frame_folder, exist_ok=True)
    videos = [os.path.join(multiple_video_folder, video_name) for video_name in os.listdir(multiple_video_folder)]
    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(single_video_KFDetector, video, key_frame_folder) for video in videos]
        for future in as_completed(futures):
            try:
                future.result()
            except Exception as e:
                print(f"An error occurred: {e}")
    folder_size = get_folder_size(key_frame_folder)
    print(f"Total size of the folder is: {folder_size} MB")

In [None]:
starttime = time.perf_counter()
multiple_video_KFDetector(current_child_path, key_frame_folder)
print('Elapsed time: ', timedelta(seconds=time.perf_counter()-starttime))

In [None]:
#Zipping key frame folder
!zip -r {key_frame_folder_zip} {key_frame_folder}/*

In [None]:
import os
import datetime
def get_image_paths(multiple_key_frame_folder):
  image_video_dict = {}
  idx = 0
  for video in sorted(os.listdir(multiple_key_frame_folder)):
      video_ID = video.replace('keyframe_', '')
      single_key_frame_folder = os.path.join(multiple_key_frame_folder, video)
      for img_name in sorted(os.listdir(single_key_frame_folder)):
        img_path = os.path.join(single_key_frame_folder, img_name)
        frame_ID = img_name.replace('.jpg','').split('_')[0]
        frame_second =  img_name.replace('.jpg','').split('_')[1]
        frame_timestamp = str(datetime.timedelta(seconds=float(frame_second)))
        image_video_dict[idx] = {'frame_ID': int(frame_ID), 'frame_path': img_path, 'video_ID': video_ID, 'timestamp': frame_timestamp}
        idx+=1
  return image_video_dict

In [None]:
import json
#Annotation
image_info_dict = get_image_paths(key_frame_folder)
# saving the dictionary as a json file
with open(f"{key_frame_folder}.json", "w") as outfile:
  json.dump(image_info_dict, outfile)

In [None]:
def reduce_frame(root, new_root, reduce_ratio = 0.5):
  for folder in os.listdir(root):
    new_dir = os.path.join(new_root, folder)
    if not os.path.exists(new_dir):
            os.makedirs(new_dir)
    for frame_path in os.listdir(os.path.join(root, folder)):
      frame_name = frame_path.replace('.jpg', '.webp')
      image = Image.open(os.path.join(root, folder, frame_path))
      x, y = image.size
      x_new, y_new = math.floor(x*reduce_ratio), math.floor(y*reduce_ratio)
      image = image.resize((x_new, y_new))
      image.save(f'{new_dir}/{frame_name}', optimize=True, quality=30, format="webp")

In [None]:
reduced_key_frame_folder = key_frame_folder + '_reduced'
print(f'Reduced keyframe folder: {reduced_key_frame_folder}')
reduced_key_frame_zip = reduced_key_frame_folder+'.zip'
print(f'Reduced keyframe folder (zip): {reduced_key_frame_zip}')

In [None]:
reduce_frame(key_frame_folder, reduced_key_frame_folder)

In [None]:
folder_size = get_folder_size(key_frame_folder)
print(f"Total size of the folder is: {folder_size} MB")
folder_size = get_folder_size(reduced_key_frame_folder)
print(f"Total size of the folder is: {folder_size} MB")

In [None]:
#Zipping key frame folder
!zip -r {reduced_key_frame_zip} {reduced_key_frame_folder}/*

In [None]:
!rm -rf {key_frame_folder}

In [None]:
!rm -rf {reduced_key_frame_folder}