# Setup

In [1]:
import math
import os
from pathlib import Path
import gc

from moviepy.editor import AudioFileClip, VideoFileClip
from pydub import AudioSegment
from tqdm import tqdm

In [5]:
# Get total duration
# Split into 3 minute segments

def split_audio(audio_filepath: str, output_dir: str, output_filename: str, segment_duration: float) -> None:
    file = AudioSegment.from_file(audio_filepath)
    tot_duration = len(file)
    segment_duration_ms = 180000
    n_segments = math.ceil(tot_duration / segment_duration_ms)
    for i in tqdm(range(n_segments)):
        start = i * segment_duration_ms
        end = start + segment_duration_ms
        segment = file[start:end]
        segment.export(f"{output_dir}/{output_filename}_part{i:02d}.wav", format="wav")
        
def conv_video(video_filepath: str, output_dir: str, output_filename: str, video_present=True) -> None:
    if video_present:
        clip = VideoFileClip(video_filepath)
        clip.audio.write_audiofile(f"{output_dir}/{output_filename}.wav")
    else:
        clip = AudioFileClip(video_filepath)
        clip.write_audiofile(f"{output_dir}/{output_filename}.wav")
    

In [7]:
conv_video("../data/raw/lck_stream_01.mkv", "../data/cache/", "lck_stream_01", video_present=False)
split_audio("../data/cache/lck_stream_01.wav", "../data/processed/", "lck_stream_01", 50)

chunk:  70%|██████▉   | 317479/454282 [05:30<01:13, 1850.92it/s, now=None]

MoviePy - Writing audio in ../data/cache//lck_stream_01.wav




chunk:  70%|██████▉   | 317479/454282 [06:46<01:13, 1850.92it/s, now=None]

MoviePy - Done.


100%|██████████| 66/66 [00:03<00:00, 19.01it/s]


In [8]:
conv_video("../data/raw/lck_stream_02.mkv", "../data/cache/", "lck_stream_02", video_present=False)
split_audio("../data/cache/lck_stream_02.wav", "../data/processed", "lck_stream_02", 50)

chunk:  70%|██████▉   | 317479/454282 [06:53<01:13, 1850.92it/s, now=None]

MoviePy - Writing audio in ../data/cache//lck_stream_02.wav




chunk:  70%|██████▉   | 317479/454282 [09:07<01:13, 1850.92it/s, now=None]

MoviePy - Done.


100%|██████████| 115/115 [00:07<00:00, 14.79it/s]


In [5]:
gc.collect()

10