##Audio Noiser

Takes wav files of noises and combines them with mp3 files of voices to produce noisy audio clips.  

This will also produce a csv file tracking the constituent files of the noisy audio clips

## How to use
1) Set the environment parameters to appropriate folders in your drive 

2) Run All blocks

3) Allow google drive to be mounted

4) Wait

###Environment Parameters

In [1]:
# Audio noiser will use ALL mp3 files in voice source directory
VOICE_SOURCE_DIR = 'drive/My Drive/github_prep/voices/'

# Audio noiser will use ALL wav files in noises source directory
NOISES_SOURCE_DIR = 'drive/My Drive/github_prep/noises/'

# Audio noiser will overwrite files with filenames of format noisy_#.wav
#   as well as a single file called noisy_audio.csv
DESTINATION_DIR = 'drive/My Drive/github_prep/noisy_voices'

## Set Up

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


### Imports

In [3]:
import glob
import librosa
import os
import csv
import numpy as np

###Parameters

In [4]:
study_voice_filenames = glob.glob(os.path.join(VOICE_SOURCE_DIR, '*.mp3'))
study_noise_filenames = glob.glob(os.path.join(NOISES_SOURCE_DIR,'*.wav'))

In [5]:
print(study_voice_filenames)
print(study_noise_filenames)

['drive/My Drive/github_prep/voices/common_voice_en_13513.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_9379.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_7832.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_110087.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_2737.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_26827.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_659909.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_88839.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_21579.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_151952.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_509026.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_138655.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_13148.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_922.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_126571.mp3', 'drive/My Drive/github_prep/voices/common_voice_en_16

In [6]:
OUTPUT_CHANNELS = 3

windowLength = 256
overlap      = round(0.25 * windowLength) # overlap of 75%
ffTLength    = windowLength
inputFs      = 48e3
fs           = 16e3
numFeatures  = ffTLength//2 + 1
numSegments  = 8
print("windowLength:",windowLength)
print("overlap:",overlap)
print("ffTLength:",ffTLength)
print("inputFs:",inputFs)
print("fs:",fs)
print("numFeatures:",numFeatures)
print("numSegments:",numSegments)

windowLength: 256
overlap: 64
ffTLength: 256
inputFs: 48000.0
fs: 16000.0
numFeatures: 129
numSegments: 8


##Method and Class Definitions

In [7]:
def read_audio(filepath, sample_rate, normalize=True):
    # print(f"Reading: {filepath}").
    audio, sr = librosa.load(filepath, sr=sample_rate)
    if normalize:
      div_fac = 1 / np.max(np.abs(audio)) / 3.0
      audio = audio * div_fac
    return audio, sr
        
def add_noise_to_clean_audio(clean_audio, noise_signal):
    if len(clean_audio) >= len(noise_signal):
        # print("The noisy signal is smaller than the clean audio input. Duplicating the noise.")
        while len(clean_audio) >= len(noise_signal):
            noise_signal = np.append(noise_signal, noise_signal)

    ## Extract a noise segment from a random location in the noise file
    ind = np.random.randint(0, noise_signal.size - clean_audio.size)

    noiseSegment = noise_signal[ind: ind + clean_audio.size]

    speech_power = np.sum(clean_audio ** 2)
    noise_power = np.sum(noiseSegment ** 2)
    noisyAudio = clean_audio + np.sqrt(speech_power / noise_power) * noiseSegment
    return noisyAudio

In [10]:
def produce_noisy_audio(voice_filepaths, noise_filepaths):
  noise_idx = 0
  noise_size = len(noise_filepaths)

  count = 0

  f = open(os.path.join(DESTINATION_DIR, "noisy_audio .csv"), 'w')
  csv_writer = csv.writer(f)
  csv_writer.writerow(["noisy", "voice used","noise used"])

  num_voices = len(voice_filepaths) 
  ten_percent = num_voices // 10 + 1
  print (f"Adding noise to {num_voices} voices")
  print("  [.", end="")
  for voice_file in voice_filepaths:
    # load the clean voice
    cleanAudio, clean_sr = read_audio(voice_file, sample_rate=fs)
    # load the noise
    noiseAudio, noise_sr = read_audio(noise_filepaths[noise_idx], sample_rate=fs)

    # add noise to the clean audio
    noisyAudio = add_noise_to_clean_audio(cleanAudio, noiseAudio)
    
    # save the noisy audio
    noisy_filename = 'noisy_' + str(count)
    noisy_write_path = os.path.join(DESTINATION_DIR,noisy_filename+'.wav')
    librosa.output.write_wav(noisy_write_path, noisyAudio, int(clean_sr))

    # record the various filenames in a csv file
    csv_writer.writerow([noisy_filename,os.path.basename(voice_file), os.path.basename(noise_filepaths[noise_idx])])

    # increment and wrap around index through noises array
    noise_idx = (noise_idx + 1) % noise_size

    count += 1
    if count % ten_percent == 0:
      print('.', end='')
  print(']')
  f.close()
  

##Produce Noisy Audio

In [11]:
produce_noisy_audio(study_voice_filenames, study_noise_filenames)

Adding noise to 46 voices
	[...........]
