Import Required Packages:

In [None]:
import os
import cv2
from tqdm import tqdm
import time
import math
import shutil

Load Downloaded Videos:

In [None]:
video_dir="C:/Users/Administrator/Documents/Data"                        # Give your Path
path_to_videos=[]
for folder in tqdm(os.listdir(video_dir),desc='Loading all videos: '):
    for video in os.listdir(os.path.join(video_dir,folder)):
        path_to_videos.append(os.path.join(video_dir,folder,video))

path_to_videos=sorted(path_to_videos)
print('[Info] Total number of videos: '+str(len(path_to_videos))

Calculate Total Running time of videos (Calculate Total Seconds):

In [None]:
seconds=0
for i in tqdm(range(len(path_to_videos)),decs='Calculating Runtime: '):
    try:
        cap = cv2.VideoCapture(path_to_videos[i])
        fps = math.ceil(cap.get(cv2.CAP_PROP_FPS))
        frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
        seconds+=math.ceil(frames/fps)
        
    except:
        print("failed to load",path_to_videos[i])

digits=len(str(seconds))
print(f'[Info] Total Running time is {seconds} seconds')

Convert Video to Frames:

In [None]:
frame_dir=os.path.join(os.path.dirname(video_dir),video_dir.split('/')[-1]+'_frames')
if not os.path.exists(frame_dir):
    os.makedirs(frame_dir)
    
dataset_name='Data_set'                     # Change dataset Name 
frame_rate=2                               # save 2 frame every 1 sec 

frame_number=0
for i in tqdm(range(len(path_to_videos)), desc='Extracting Frames: '):
    try: 
        cap = cv2.VideoCapture(path_to_videos[i])
        frame_width = int(cap.get(3))
        frame_height = int(cap.get(4))
        count = 0
        fps = math.ceil(cap.get(cv2.CAP_PROP_FPS)*frame_rate)
        ret=True
        while ret:
            ret, frame = cap.read()
            if ret:
                if count % fps  == 0:
                    frame_number+=1
                    uniq_number=f"%0{digits}d"%frame_number
                    time_stamp=str(time.time()).replace(".","")   
                    output_file=frame_dir + f"/{time_stamp}_{dataset_name}_{uniq_number}.jpg"
                    cv2.imwrite(output_file, frame)
            else:
                cap.release()
            count = count + 1
    except:
        print("failed to load",path_to_videos[i])

print(f'[Info] Total number of frames extracted: {len(os.listdir(frame_dir))}')

Make Batches (2000 per batch is preferred):

In [None]:
def create_batch(folder_loc,batch_size=2000):
    all_file_path=[os.path.join(folder_loc,filename) for filename in os.listdir(folder_loc)]
    for i in tqdm(range(len(all_file_path)), desc='Creating Batches: '):
        if i%batch_size==0:
            batch_number=i//batch_size +1
            new_folder=folder_loc+f"/{dataset_name}-{batch_number}" 
            os.mkdir(new_folder)
        shutil.move(all_file_path[i],new_folder)
    print("[Info] Total number of batches created: {len(os.listdir(folder_loc))}")

create_batch(frame_dir,2000)

Zipping Subfolders:

In [None]:
def zip_folders(img_batch_dir):
    zip_dir=os.path.join(os.path.dirname(img_batch_dir),'zipped_batches')
    for folder in tqdm(os.listdir(img_batch_dir), desc='Zipping Batches: '):
        folder_name=os.path.join(img_batch_dir,folder)
        output_path=os.path.join(zip_dir,folder)
        shutil.make_archive(output_path, 'zip', folder_name)
    return "[Info] Batch Folders zipped and stored in: "+zip_dir

zip_folders(frame_dir)