In [29]:
import os
from glob import glob
import ntpath
import shutil

import pandas as pd
import librosa
import soundfile as sf
from tqdm import tqdm
import numpy as np

In [30]:
def preprocess_audio(wave_paths, sample_rate, sample_buffer, out_path):
    for filename in tqdm(wave_paths):                    
        audio, _ = librosa.load(filename, sr=sample_rate)                              
        file_length = librosa.get_duration(y=audio, sr=sample_rate)    
        name = ntpath.basename(filename[:-4]) 

        samples_total = file_length * sample_rate                               

        if samples_total < sample_buffer:
            pad_short(audio, sample_rate, sample_buffer, samples_total, out_path, name)
            loop_short(audio, sample_rate, sample_buffer, samples_total, out_path, name)
        elif file_length >= chunk_length:
            chunk_long(audio, sample_rate, sample_buffer, samples_total, out_path, name)

            
def loop_short(audio, sample_rate, sample_buffer, samples_total, out_path, name):
        # loop files that are shorter than chunk length
        count = int(sample_buffer / samples_total) + (sample_buffer % samples_total > 0)
        i = 1                                                              
        loop = audio                                                    

        while i < count:
            loop = np.concatenate([loop, audio])             
            i += 1                                                    

        loop = loop[: int(sample_buffer)]                                  
        sf.write(f'{out_path+name}_loop.wav', loop, sample_rate)                              

        
def pad_short(audio, sample_rate, sample_buffer, samples_total, out_path, name):
        # pad files that are shorter than chunk length
        pad = int(sample_buffer - samples_total)
        wave = np.pad(audio, (0, pad))
            
        out_filename = out_path + name + "_padded" + ".wav"                   
        sf.write(out_filename, wave, sample_rate)     
        
        
def chunk_long(audio, sample_rate, sample_buffer, samples_total, out_path, name):
        # split longer files into chunks
        samples_wrote = 0
        counter = 1                                                                
        while samples_wrote < samples_total:
            if (samples_total - samples_wrote) >= sample_buffer:                           
                chunk = audio[samples_wrote: int(samples_wrote + sample_buffer)]        
                out_filename = out_path + name + "_chunk" + str(counter) + ".wav"
                sf.write(out_filename, chunk, sample_rate)                             
                counter += 1                                                       
                samples_wrote = int(samples_wrote + sample_buffer - overlap_samples) 

            # wrap audio for end chunks
            if (samples_total - samples_wrote) < sample_buffer:                          
                if (samples_total - samples_wrote) > min_samples:                  
                    wrap_length = int(sample_buffer - (samples_total - samples_wrote))    
                    wrap = audio[0: int(wrap_length)]                                   
                    chunk = audio[samples_wrote: int(samples_wrote + sample_buffer)]        
                    wrapped_file = np.concatenate([chunk, wrap])                 
                    out_filename = out_path + name + "_wrap" + str(counter) + ".wav" 
                    sf.write(out_filename, wrapped_file, sample_rate)                       
                    counter += 1                                                    
                samples_wrote = int(samples_wrote + sample_buffer - overlap_samples)   

In [None]:
windows = [(5, 2.5), (10, 5), (15, 7.5)]

for w in windows:

    sample_rate = 44100
    chunk_length = w[0]
    chunk_overlap = w[1]
    min_length = 1.25

    # calculate global variables
    sample_buffer = chunk_length * sample_rate         # number of samples per chunk
    overlap_samples = chunk_overlap * sample_rate      # overlap of chunks in samples
    min_samples = min_length * sample_rate             # minimum end samples

    dsets = ['train', 'val', 'test']
    outdir = f'../../data/production_data/{chunk_length}s_crop/'

    for ds in dsets:

        paths = glob(f'../../data/{ds}/*.wav')
        out_path = f'{outdir}/{ds}/'
        preprocess_audio(paths, sample_rate, sample_buffer, out_path)

    df = pd.read_csv(f'../../data/metadata.csv')
    df = df[['file_name', 'unique_file', 'path', 'label', 'subset']]

    all_dfs = []
    for i in tqdm(range(len(df))):
        name = ntpath.basename(df.iloc[i]['path'][:-4]) 
        subset = df.iloc[i]['subset'] 
        subset = 'val' if subset == 'validation' else subset
        chunks = glob(f'{outdir}/{subset}/{name}*.wav')
        n_chunks = len(chunks)
        new_df = pd.DataFrame(np.tile(df.iloc[i].values, n_chunks).reshape(n_chunks, len(df.columns)) , columns=df.columns)
        new_df['path'] = chunks
        all_dfs.append(new_df)

    pp_df = pd.concat(all_dfs)
    pp_df.to_csv(f'{outdir}/metadata.csv', index=False)

In [13]:
# Constants
SR = 44100                          # sample rate
chunk_length = 7.5                    # length of chunks in seconds
overlap = 3.75                      # length of overlap between consecutive chunks
min_length = 1.25                   # minimum length for end chunks to be created

# calculate global variables
buffer = chunk_length * SR          # number of samples per chunk
overlap_samples = overlap * SR      # overlap of chunks in samples
min_samples = min_length * SR       # minimum end samples

paths = glob(f'../../data/train/*.wav')
out_path = f'../../data/production_data/5s_crop/train/'

# process all files in input folder
for filename in tqdm(paths):                     # wav files only
    audio, sr = librosa.load(filename, sr=SR)                               # load file with correct SR
    file_length = librosa.get_duration(y=audio, sr=SR)                        # get file length in seconds
    name = ntpath.basename(filename[:-4])                                   # filename without path or extension
    # print("Processing: " + name)

    # Calculate and initialize local variables
    samples_total = file_length * SR                                        # overall n samples per file
    samples_wrote = 0                                                       # initialize start time

    # loop files that are shorter than chunk length
    if samples_total < buffer:
        count = int(buffer / samples_total) + (buffer % samples_total > 0)  # rounded how often the file fits in buffer
        i = 1                                                               # init counter
        loop = audio                                                        # init loop

        while i < count:
            loop = np.concatenate([loop, audio])                            # add file to itself until buffer is filled
            i += 1                                                          # update counter

        loop = loop[0: int(buffer)]                                              # truncate loop to specified chunk length
        out_filename = out_path + name + "_loop" + ".wav"                     # create output file name
        sf.write(out_filename, loop, SR)                                    # save file

    # split longer files into chunks
    if file_length >= chunk_length:
        counter = 1                                                                 # initialize counter for file name
        while samples_wrote < samples_total:
            if (samples_total - samples_wrote) >= buffer:                           # if buffer fits in remaining time
                chunk = audio[samples_wrote: int(samples_wrote + buffer)]              # create the audio chunk
                out_filename = out_path + name + "_chunk" + str(counter) + ".wav"     # create file name with counter
                sf.write(out_filename, chunk, SR)                                   # export file
                counter += 1                                                        # update counter
                samples_wrote = int(samples_wrote + buffer - overlap_samples)       # update start position

            # wrap audio for end chunks
            if (samples_total - samples_wrote) < buffer:                            # if remaining time is too short...
                if (samples_total - samples_wrote) > min_samples:                   # ... and longer than minimum time
                    wrap_length = int(buffer - (samples_total - samples_wrote))     # wrap length
                    wrap = audio[0: wrap_length]                                    # create wrap chunk
                    chunk = audio[samples_wrote: int(samples_wrote + buffer)]          # create the audio chunk
                    wrapped_file = np.concatenate([chunk, wrap])                    # combine chunk with wrap chunk
                    out_filename = out_path + name + "_wrap" + str(counter) + ".wav"  # file name
                    sf.write(out_filename, wrapped_file, SR)                        # export file
                    counter += 1                                                    # update counter
                samples_wrote = int(samples_wrote + buffer - overlap_samples)       # update start position

In [5]:
librosa.get_duration(y=audio, sr=SR)   

In [11]:
buffer

In [None]:
15145