In [4]:
import pandas as pd

# Replace 'your_file_path.csv' with the path of your uploaded CSV file

df = pd.read_csv('resources/fnew-2023-2.csv', nrows=100)
print(df.head(10))

                   dtm       f
0  2023-02-01 00:00:00  50.083
1  2023-02-01 00:00:01  50.073
2  2023-02-01 00:00:02  50.061
3  2023-02-01 00:00:03  50.050
4  2023-02-01 00:00:04  50.044
5  2023-02-01 00:00:05  50.040
6  2023-02-01 00:00:06  50.038
7  2023-02-01 00:00:07  50.040
8  2023-02-01 00:00:08  50.039
9  2023-02-01 00:00:09  50.035


In [2]:
import cv2
import numpy as np
import random
import pandas as pd

def extract_random_segment(video_path, segment_length_min=10):
    # Open the video file
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        raise IOError("Cannot open video file")

    fps = cap.get(cv2.CAP_PROP_FPS)  # Frame rate of the video
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # Total number of frames in the video
    total_duration = total_frames / fps  # Total duration of the video in seconds
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    codec = int(cap.get(cv2.CAP_PROP_FOURCC))  # Codec of the video
    codec_string = chr(codec & 0xFF) + chr((codec >> 8) & 0xFF) + chr((codec >> 16) & 0xFF) + chr((codec >> 24) & 0xFF)

    segment_length_sec = segment_length_min * 60  # Segment length in seconds

    if segment_length_sec > total_duration:
        raise ValueError("Segment length is longer than the video duration")

    # Calculate random start frame
    start_frame = random.randint(0, total_frames - int(segment_length_sec * fps))
    start_time = start_frame / fps  # Start time in seconds

    # Set the starting frame
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

    # Prepare to save the segment
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter('output_segment.mp4', fourcc, fps, (int(width), int(height)))

    # Read and write frames
    for _ in range(int(segment_length_sec * fps)):
        ret, frame = cap.read()
        if not ret:
            break
        out.write(frame)

    cap.release()
    out.release()

    # Create a DataFrame with metadata
    metadata = pd.DataFrame({
        'FPS': [fps],
        'Total Frames': [total_frames],
        'Total Duration (s)': [total_duration],
        'Height (pixels)': [height],
        'Width (pixels)': [width],
        'Codec': [codec_string],
        'Random Start Time (s)': [start_time]
    })

    return metadata

# Example usage
video_path = "resources/Westminster_Hall_09_02_23_13_32_04.mp4"
metadata_df = extract_random_segment(video_path)
print(metadata_df)


    FPS  Total Frames  Total Duration (s)  Height (pixels)  Width (pixels)  \
0  25.0        235728             9429.12            576.0          1024.0   

  Codec  Random Start Time (s)  
0  avc1                7756.44  


In [3]:
from moviepy.editor import VideoFileClip
import random
import pandas as pd
from pydub import AudioSegment

def extract_random_segment(video_path, segment_length_min=10):
    # Load the video
    video = VideoFileClip(video_path)

    # Get total duration of the video in seconds
    total_duration = video.duration

    # Segment length in seconds
    segment_length_sec = segment_length_min * 60

    # Ensure the segment length is not longer than the video
    if segment_length_sec > total_duration:
        raise ValueError("Segment length is longer than the video duration")

    # Calculate random start time in seconds
    start_time = random.uniform(0, total_duration - segment_length_sec)

    # Extract the segment
    segment = video.subclip(start_time, start_time + segment_length_sec)

    # Save the video segment without audio
    segment_video_path = "output_segment.mp4"
    segment.without_audio().write_videofile(segment_video_path, codec="libx264")

    # Save audio as WAV
    audio_path = 'extracted_audio.wav'
    try:
        segment.audio.write_audiofile(audio_path, codec="pcm_s16le")
    except Exception as e:
        print(f"Error in audio extraction: {e}")
        return None

    # Use pydub to read the audio and extract additional metadata
    audio = AudioSegment.from_file(audio_path)
    sampling_rate = audio.frame_rate
    resolution = audio.sample_width * 8  # Sample width in bytes multiplied by 8 to convert to bits

    # Create a DataFrame with metadata
    metadata = pd.DataFrame({
        'FPS': [video.fps],
        'Total Duration (s)': [total_duration],
        'Height (pixels)': [video.size[1]],
        'Width (pixels)': [video.size[0]],
        'Random Start Time (s)': [start_time],
        'Audio Sampling Rate (Hz)': [sampling_rate],
        'Audio Resolution (bits)': [resolution]
    })

    return metadata

# Example usage
video_path = "resources/Westminster_Hall_09_02_23_13_32_04.mp4"
metadata_df = extract_random_segment(video_path)
if metadata_df is not None:
    print(metadata_df)


t:   0%|          | 11/15000 [02:38<59:58:00, 14.40s/it, now=None]

Moviepy - Building video output_segment.mp4.
Moviepy - Writing video output_segment.mp4



t:   0%|          | 11/15000 [07:49<177:32:22, 42.64s/it, now=None]

Moviepy - Done !
Moviepy - video ready output_segment.mp4
MoviePy - Writing audio in extracted_audio.wav


t:   0%|          | 11/15000 [07:58<180:56:43, 43.46s/it, now=None]

MoviePy - Done.
    FPS  Total Duration (s)  Height (pixels)  Width (pixels)  \
0  25.0             9429.12              576            1024   

   Random Start Time (s)  Audio Sampling Rate (Hz)  Audio Resolution (bits)  
0            4031.428606                     44100                       16  


In [4]:
import subprocess
import json

def extract_metadata_ffprobe(video_path):
    cmd = ['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', '-show_streams', video_path]
    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    return json.loads(result.stdout)

video_path = 'resources/Westminster_Hall_09_02_23_13_32_04.mp4'
metadata = extract_metadata_ffprobe(video_path)
print(json.dumps(metadata, indent=4))

{
    "streams": [
        {
            "index": 0,
            "codec_name": "aac",
            "codec_long_name": "AAC (Advanced Audio Coding)",
            "profile": "LC",
            "codec_type": "audio",
            "codec_tag_string": "mp4a",
            "codec_tag": "0x6134706d",
            "sample_fmt": "fltp",
            "sample_rate": "48000",
            "channels": 2,
            "channel_layout": "stereo",
            "bits_per_sample": 0,
            "id": "0x1",
            "r_frame_rate": "0/0",
            "avg_frame_rate": "0/0",
            "time_base": "1/48000",
            "start_pts": 0,
            "start_time": "0.000000",
            "duration_ts": 452597760,
            "duration": "9429.120000",
            "bit_rate": "61495",
            "nb_frames": "441990",
            "extradata_size": 2,
            "disposition": {
                "default": 1,
                "dub": 0,
                "original": 0,
                "comment": 0,
               