In [2]:
import numpy as np
import os
import subprocess
from pathlib import Path

In [3]:
def ffmpeg_filenames_cut(input_dir, output_dir):
    try:
        # List all files in the directory
        files = os.listdir(input_dir)
        # Filter out directories, only keep files
        raw_files = [f for f in files if os.path.isfile(os.path.join(input_dir, f))]
        cut_files = []
        files = []

        for raw in raw_files:
            cut_file = output_dir + '/cut_'+raw
            cut_files.append(cut_file)
        
        for raw in raw_files:
            files.append(input_dir + '/' + raw)

        return files, cut_files

    except FileNotFoundError:
        return f"The directory {input_dir} does not exist."
    except PermissionError:
        return f"Permission denied to access {input_dir}."

def ffmpeg_filenames_new_uncut(input_dir, output_dir):
    try:
        # List all files in the directory
        files = os.listdir(input_dir)
        # Filter out directories, only keep files
        raw_files = [f for f in files if os.path.isfile(os.path.join(input_dir, f))]
        cut_files = []
        files = []

        for raw in raw_files:
            cut_file = output_dir + '/' + raw
            cut_files.append(cut_file)
        
        for raw in raw_files:
            files.append(input_dir + '/' + raw)

        return files, cut_files

    except FileNotFoundError:
        return f"The directory {input_dir} does not exist."
    except PermissionError:
        return f"Permission denied to access {input_dir}."

def cut_audio(input_file, output_file, cut_begin, cut_end):
    
    input_file = Path(input_file)
    output_file = Path(output_file)

    subprocess.run([
        'bash', '-c', 
        f'. /home/$USER/miniforge3/etc/profile.d/conda.sh; conda activate enfify; '
        f'ffmpeg -i "{input_file}" -filter_complex "[0]atrim=end={cut_begin},asetpts=PTS-STARTPTS[a1]; '
        f'[0]atrim=start={cut_end},asetpts=PTS-STARTPTS[a2]; '
        f'[a1][a2]concat=n=2:v=0:a=1[out]" -map "[out]" "{output_file}"'
    ])

def cut_end_audio(input_file, output_file, cut_point):
    
    input_file = Path(input_file)
    output_file = Path(output_file)

    subprocess.run([
        'bash', '-c', 
        f'. /home/$USER/miniforge3/etc/profile.d/conda.sh; conda activate enfify; '
        f'ffmpeg -i "{input_file}" -filter_complex "atrim=end={cut_point},asetpts=PTS-STARTPTS[out]" '
        f'-map "[out]" "{output_file}"'
    ])  

def mult_cut_audio(input_file, output_file, cut_begin_1, cut_end_1, cut_begin_2, cut_end_2, cut_begin_3, cut_end_3):
    os.system(f'. /home/$USER/miniforge3/etc/profile.d/conda.sh; conda activate enfify; ffmpeg -i {input_file} -filter_complex "[0]atrim=end={cut_begin_1},asetpts=PTS-STARTPTS[a1]; \
 [0]atrim=start={cut_end_1}:end={cut_begin_2},asetpts=PTS-STARTPTS[a2]; \
 [0]atrim=start={cut_end_2}:end={cut_begin_3},asetpts=PTS-STARTPTS[a3]; \
 [0]atrim=start={cut_end_3},asetpts=PTS-STARTPTS[a4]; \
 [a1][a2][a3][a4]concat=n=4:v=0:a=1[out]" -map "[out]" {output_file}')

In [None]:
input_dir = '/home/leo_dacasi/Dokumente/summerofcode/ENFify/notebooks/Talkbank_data'
output_dir = '/home/leo_dacasi/Dokumente/summerofcode/ENFify/notebooks/Talkbank_cut_data'
cut_begin = 60
cut_end = 120

files, cut_files = ffmpeg_filenames_cut(input_dir, output_dir)

for i in range(len(files)):

    cut_audio(files[i], cut_files[i], cut_begin, cut_end)

In [None]:
input_dir = '/home/leo_dacasi/Dokumente/summerofcode/ENFify/notebooks/Talkbank_data'
output_dir = '/home/leo_dacasi/Dokumente/summerofcode/ENFify/notebooks/Talkbank_cut_data'
cut_begin_1 = 60
cut_end_1 = 120
cut_begin_2 = 240
cut_end_2 = 300
cut_begin_3 = 420
cut_end_3 = 480

files, cut_files = ffmpeg_filenames_cut(input_dir, output_dir)

for i in range(len(files)):
    mult_cut_audio(files[i], cut_files[i], cut_begin_1, cut_end_1, cut_begin_2, cut_end_2, cut_begin_3, cut_end_3)

In [None]:
input_dir = '/home/leo_dacasi/Dokumente/summerofcode/Enfify_Data_Synced/raw/ENF-WHU-Dataset/reference_enf'
output_dir = '/home/leo_dacasi/Dokumente/summerofcode/Enfify_Data_Synced/interim/ENF-WHU-Dataset/one_cut_ref_enf'

max_cut = 120
max_coord = 360
cut_len = [np.random.randint(0, max_cut) for i in range(130)]
i_cut = [np.random.randint(150, 360) for i in range(130)]

files, cut_files = ffmpeg_filenames_cut(input_dir, output_dir)

for i in range(len(files)):
    cut_audio(files[i], cut_files[i], i_cut[i], (i_cut[i]+cut_len[i]))

In [None]:
def cut_out(input_file, output_file, start, end):
    os.system(f'. /home/$USER/miniforge3/etc/profile.d/conda.sh; conda activate enfify; ffmpeg -ss {start} -i {input_file} -t {end} -c copy {output_file}')

def minute_list_files_in_directory(input_dir, output_dir):
    try:
        # List all files in the directory
        files = os.listdir(input_dir)
        # Filter out directories, only keep files
        raw_files = [f for f in files if os.path.isfile(os.path.join(input_dir, f))]
        down_files = []
        files = []

        for raw in raw_files:
            down_file = output_dir + '/min_'+raw
            down_files.append(down_file)
        
        for raw in raw_files:
            files.append(input_dir + '/' + raw)

        return files, down_files

    except FileNotFoundError:
        return f"The directory {input_dir} does not exist."
    except PermissionError:
        return f"Permission denied to access {input_dir}."

In [None]:
input_dir = '/home/leo_dacasi/Dokumente/summerofcode/Enfify_Data_Synced/raw/ENF-WHU-Dataset/enf_real_world_recordings'
output_dir = '/home/leo_dacasi/Dokumente/summerofcode/Enfify_Data_Synced/interim/ENF-WHU-Dataset/1min_noise/1min_noisy'

files, min_files =   minute_list_files_in_directory(input_dir, output_dir)

for i in range(len(files)):
    cut_out(files[i], min_files[i], 10, 60)

In [None]:
input_dir = '/home/leo_dacasi/Dokumente/summerofcode/Enfify_Data_Synced/interim/ENF\ WHU\ Dataset/10s_ref/10s_enf_ref'
output_dir = '/home/leo_dacasi/Dokumente/summerofcode/Enfify_Data_Synced/interim/ENF\ WHU \Dataset/10s_ref/cut_10s_enf_ref'

files, cut_files = ffmpeg_filenames_cut(input_dir, output_dir)
cut_len = [np.random.randint(1, 3) for i in range(130)]
i_cut = [np.random.randint(3, 8) for i in range(130)]

for i in range(len(files)):
    cut_audio(files[i], cut_files[i], i_cut[i], (i_cut[i]+cut_len[i]))

In [4]:
raw_dir = "/home/leo_dacasi/Dokumente/summerofcode/Enfify Data Synced/raw/synthetic/uncut/train_audio"
cut_dir = "/home/leo_dacasi/Dokumente/summerofcode/Enfify Data Synced/raw/synthetic/cut/new_train_cut_audio"
new_raw_dir = "/home/leo_dacasi/Dokumente/summerofcode/Enfify Data Synced/raw/synthetic/new_uncut/train"

files, cut_files = ffmpeg_filenames_cut(raw_dir, cut_dir)
files, uncut_files = ffmpeg_filenames_new_uncut(raw_dir, new_raw_dir)
cut_len = [np.random.randint(1, 19) for i in range(len(files))]
i_cut = [np.random.randint(20, 41) for i in range(len(files))]

for i in range(len(files)):
    i_uncut =  60 - cut_len[i]
    cut_audio(files[i], cut_files[i], i_cut[i], (i_cut[i]+cut_len[i]))
    cut_end_audio(files[i], uncut_files[i], i_uncut)

ffmpeg version 7.0.2 Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 13.3.0 (conda-forge gcc 13.3.0-0)
  configuration: --prefix=/home/conda/feedstock_root/build_artifacts/ffmpeg_1724645118421/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1724645118421/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1724645118421/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1724645118421/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1724645118421/_build_env/bin/x86_64-conda-linux-gnu-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libharfbuzz --enable-libfontconfig --enable-libo