In [None]:
import os
import cv2
from PIL import Image
from torchvision import transforms
from tqdm import tqdm
import zipfile
import io

# Define the transformation to resize to 224x224
resize_transform = transforms.Resize((224, 224))

# Set the paths
input_folder = 'input/folder'  # Path to the folder containing videos
output_zip_file = 'output/folder.zip'  # Path to save the ZIP file containing extracted frames

# Function to extract 30 frames from a video
def extract_15_frames(video_path, zip_writer, num_frames=30):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Calculate the interval based on the video length
    frame_interval = max(total_frames // num_frames, 1)
    
    # Counter for extracted frames
    extracted_frames = 0
    frame_count = 0
    
    while cap.isOpened() and extracted_frames < num_frames:
        ret, frame = cap.read()
        
        if not ret:
            break
        
        # Extract a frame if it matches the frame interval
        if frame_count % frame_interval == 0:
            # Convert to RGB format for PIL compatibility
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_image = Image.fromarray(frame_rgb)
            
            # Resize the frame to 224x224
            resized_frame = resize_transform(frame_image)
            
            # Save the resized frame to the ZIP file
            img_byte_arr = io.BytesIO()
            resized_frame.save(img_byte_arr, format='JPEG')
            img_byte_arr.seek(0)
            
            # Use the video name as part of the frame filename
            frame_filename = f"{os.path.splitext(os.path.basename(video_path))[0]}_frame_{extracted_frames:03d}.jpg"
            zip_writer.writestr(frame_filename, img_byte_arr.read())
            
            extracted_frames += 1

        frame_count += 1

    cap.release()
    return extracted_frames

# Create the ZIP file
with zipfile.ZipFile(output_zip_file, 'w', zipfile.ZIP_DEFLATED) as zip_writer:
    total_frames_extracted = 0

    # Process each video in the input folder
    for video_file in tqdm(os.listdir(input_folder), desc="Processing videos"):
        video_path = os.path.join(input_folder, video_file)
        
        if os.path.isfile(video_path):
            # Extract frames and update the total count
            extracted_count = extract_15_frames(video_path, zip_writer)
            total_frames_extracted += extracted_count
    
    print(f"Total frames extracted: {total_frames_extracted}")