Program to perform acoustic signal augmentation - generate new audio clips using existing ones.

In [2]:
from pathlib import Path, PurePath

try:
  import csv
except:
  !pip3 install csv

import numpy

try:
  from pydub import AudioSegment
  from pydub.utils import get_array_type
except:
  !pip3 install pydub


Next, code cell contains variables with the relevant directory paths.

In [5]:
# Path for directory containing raw audio clips
raw_audio_clips_path = '/content/drive/MyDrive/Data-science_learnt_applied/Problems/Juliana_bio-acoustic_FiebergLab_UoM/Datasets/Tapir/Raw-audio-files'

# Path for directory containing spectrograms generated from raw audio clips
raw_audio_spectrograms_path = '/content/drive/MyDrive/Data-science_learnt_applied/Problems/Juliana_bio-acoustic_FiebergLab_UoM/Datasets/Tapir/Spectrograms-for-raw-clips'

# Path for directory containing synthetic audio clips with silence in b/g
# generated from raw audio clips
augmented_audio_with_silence_path = '/content/drive/MyDrive/Data-science_learnt_applied/Problems/Juliana_bio-acoustic_FiebergLab_UoM/Datasets/Tapir/Synthetic-clips_with-silence'

# Path for directory containing synthetic audio clips with real b/g generated
# from raw audio clips
augmented_audio_with_background_path = '/content/drive/MyDrive/Data-science_learnt_applied/Problems/Juliana_bio-acoustic_FiebergLab_UoM/Datasets/Tapir/Synthetic-clips_with-background'

# Path for the metadata - CSV file containing filenames for raw audio clips
raw_clips_metadata_path = '/content/drive/MyDrive/Data-science_learnt_applied/Problems/Juliana_bio-acoustic_FiebergLab_UoM/Datasets/Tapir/metadata_raw-audio-files.csv'


The next code cell generates new audio clips with silence at all times other than the duration of the tapir call.

In [6]:
def augment_wi_silence(path, stem, *time):
# The method takes the path of the audio file, its stem (which is the name
# excluding the extension), and start and end times of each tapir call in the
# clp as arguments.

  clips = numpy.zeros([5], dtype=object)
  # To temporarily store synthetic clips generated. Using each tapir call, five
  # 5s clips are generated.

  for t in range(int(len(time)/2)):
  # Loop runs as many times as the number of tapir calls
    for i in range(0, 5000, 1000):
      clips[int(i/1000)] = AudioSegment.silent(duration=i) + AudioSegment.\
      from_file(path)[time[2*t]:time[2*t+1]] + AudioSegment.silent\
      (duration=5000-i-time[2*t+1]+time[2*t])
      # Creates five 5 sec clips w/ tapir call positioned at times 0s, 1s, 2s,
      # 3s, 4s.

    for i in range(len(clips)):
      clips[i].export(Path(augmented_audio_with_silence_path, stem + '_' +\
                           str(t+1) + '_' + str(i+1)+ '.wav'), format='wav')
      # Saves each clip using the name of the original file, the number of
      # tapir call interval in the original file, and the position of the tapir
      # call in the generated clip.

with open(Path(raw_clips_metadata_path), 'r') as file_object:
  file_reader = csv.reader(file_object, delimiter=',')
  for raw_audio_meta in file_reader:
    interval = [int(x) for x in raw_audio_meta[1:] if x]
    augment_wi_silence(
        Path(raw_audio_clips_path, raw_audio_meta[0]),
        PurePath(Path(raw_audio_clips_path, raw_audio_meta[0])).stem,
        *interval
    )

The next code cell generates new audio clips with background taken from the original clips.

In [7]:
def augment_wi_background(path, stem, *time):
# The method takes the path of the audio file, its stem (which is the name
# excluding the extension), and start and end times of each tapir call in the
# clp as arguments.

  clips = numpy.zeros([5], dtype=object)
  
  for t in range(int(len(time)/2)):
    for i in range(int(time[0]/5000)*5000, int(time[0]/5000)*5000+5000, 1000):
      if i<time[2*t] and i<time[2*t+1]:
        clips[int(i/1000-5*int(i/5000))] = AudioSegment.from_file(path)\
        [5000*int(time[2*t]/5000):i] + AudioSegment.from_file(path)\
        [time[2*t]:time[2*t+1]] + AudioSegment.from_file(path)[i:time[2*t]] +\
        AudioSegment.from_file(path)[time[2*t+1]:5000*int(time[2*t]/5000)+5000]
      elif i>time[2*t] and i<time[2*t+1]:
        clips[int(i/1000-5*int(i/5000))] = AudioSegment.from_file(path)\
        [5000*int(time[2*t]/5000):time[2*t]] + AudioSegment.from_file(path)\
        [time[2*t+1]:i+time[2*t+1]-time[2*t]] + AudioSegment.from_file(path)\
        [time[2*t]:time[2*t+1]] + AudioSegment.from_file(path)\
        [i+time[2*t+1]-time[2*t]:5000*int(time[2*t]/5000)+5000]
      elif i>time[2*t] and i>time[2*t+1]:
        clips[int(i/1000-5*int(i/5000))] = AudioSegment.from_file(path)[5000*int(time[2*t]/5000):time[2*t]] + AudioSegment.from_file(path)[time[2*t+1]:i+time[2*t+1]-time[2*t]] + AudioSegment.from_file(path)[time[2*t]:time[2*t+1]] + AudioSegment.from_file(path)[i+time[2*t+1]-time[2*t]:5000*int(time[2*t]/5000)+5000]

    for i in range(len(clips)):
      clips[i].export(Path(augmented_audio_with_background_path, stem + '_' + str(t+1) + '_' + str(i+1)+ '.wav'), format='wav')
    
with open(Path(raw_clips_metadata_path), 'r') as file_object:
  file_reader = csv.reader(file_object, delimiter=',')
  for raw_audio_meta in file_reader:
    y = [int(x) for x in raw_audio_meta[1:] if x]
    augment_wi_background(
        Path(raw_audio_clips_path, raw_audio_meta[0]),
        PurePath(Path(raw_audio_clips_path, raw_audio_meta[0])).stem,
        *y
    )