In [1]:
# This code provides information about each and every video in the training and validation splits in the dataset
# The information includes Filename, Width, Height, Duration, Audio Presence, Codec, Format and Size

import os
import subprocess
import csv
import tqdm

In [2]:
def get_video_properties(filename):
    """Get properties of a video using ffprobe."""
    command = [
        "ffprobe",
        "-v", "error",
        "-select_streams", "v:0",
        "-show_entries", "stream=width,height,codec_name",
        "-show_entries", "format=format_name,duration",
        "-of", "default=noprint_wrappers=1",
        filename
    ]
    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    properties = {}
    for line in result.stdout.split('\n'):
        if '=' in line:
            key, value = line.split('=')
            properties[key.strip()] = value.strip()

    # Check for audio stream
    has_audio = subprocess.run(
        ["ffprobe", "-v", "error", "-select_streams", "a", "-show_entries", "stream=codec_name", "-of", "default=noprint_wrappers=1", filename],
        stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
    )
    properties['has_audio'] = 'Yes' if has_audio.stdout else 'No'

    # Retrieve and convert file size to megabytes
    file_size_bytes = os.path.getsize(filename)
    file_size_mb = file_size_bytes / 1048576  # Convert bytes to megabytes

    # Map to desired names and format
    video_details = {
        'Filename': os.path.basename(filename),
        'Width': properties.get('width', 'Unknown'),
        'Height': properties.get('height', 'Unknown'),
        'Duration': properties.get('duration', 'Unknown'),
        'Audio': properties['has_audio'],
        'Codec': properties.get('codec_name', 'Unknown'),
        'Format': properties.get('format_name', 'Unknown'),
        'Size (MB)': round(file_size_mb, 2)  # Round to two decimal places for clarity
    }
    return video_details

In [3]:
def save_to_csv(video_info, output_file):
    if video_info:
        keys = ['Filename', 'Width', 'Height', 'Duration', 'Audio', 'Codec', 'Format', 'Size (MB)']
        with open(output_file, 'w', newline='') as file:
            dict_writer = csv.DictWriter(file, fieldnames=keys)
            dict_writer.writeheader()
            dict_writer.writerows(video_info)
        print(f"Data written to {output_file}")

In [4]:
def process_videos_in_directory(directory, output_csv):
    video_files = []
    video_info = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
                video_files.append(os.path.join(root, file))
    
    with tqdm.tqdm(total=len(video_files), desc=f"Processing videos in {directory}") as pbar:
        print(video_files)
        for filename in video_files:
            # print(filename)
            filepath = os.path.join(directory, filename)
            # print(filepath)
            try:
                properties = get_video_properties(filepath)
                video_info.append(properties)
                pbar.update(1)  # Update the progress bar after each processed video
            except Exception as e:
                print(f"Error processing file {filename}: {e}")
                pbar.update(1)
    save_to_csv(video_info, output_csv)

In [5]:
data_dir = r"D:\Main Project\Hockey data\HockeyFightOriginal"
data_folders = ['train', 'val']

# Process each data folder and save outputs in separate CSV files
for folder in data_folders:
    folder_path = os.path.join(data_dir, folder)
    output_csv = f"{folder}_video_info.csv"
    process_videos_in_directory(folder_path, output_csv)

Processing videos in D:\Main Project\Hockey data\HockeyFightOriginal\train:   0%|          | 2/800 [00:00<01:00, 13.24it/s]

['D:\\Main Project\\Hockey data\\HockeyFightOriginal\\train\\Fight\\fi101_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\train\\Fight\\fi102_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\train\\Fight\\fi103_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\train\\Fight\\fi104_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\train\\Fight\\fi105_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\train\\Fight\\fi106_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\train\\Fight\\fi107_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\train\\Fight\\fi108_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\train\\Fight\\fi109_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\train\\Fight\\fi110_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\train\\Fight\\fi111_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\train\\Fight\\fi112_xv

Processing videos in D:\Main Project\Hockey data\HockeyFightOriginal\train: 100%|██████████| 800/800 [01:01<00:00, 13.01it/s]


Data written to train_video_info.csv


Processing videos in D:\Main Project\Hockey data\HockeyFightOriginal\val:   1%|          | 2/200 [00:00<00:16, 11.98it/s]

['D:\\Main Project\\Hockey data\\HockeyFightOriginal\\val\\Fight\\fi100_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\val\\Fight\\fi10_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\val\\Fight\\fi11_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\val\\Fight\\fi12_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\val\\Fight\\fi13_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\val\\Fight\\fi14_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\val\\Fight\\fi15_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\val\\Fight\\fi16_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\val\\Fight\\fi17_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\val\\Fight\\fi18_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\val\\Fight\\fi19_xvid.avi', 'D:\\Main Project\\Hockey data\\HockeyFightOriginal\\val\\Fight\\fi1_xvid.avi', 'D:\\Main Project\\Hockey d

Processing videos in D:\Main Project\Hockey data\HockeyFightOriginal\val: 100%|██████████| 200/200 [00:15<00:00, 12.82it/s]

Data written to val_video_info.csv



