In [25]:
import numpy as np
import librosa as lr
import scaper
import soundfile as sf
import os
import pandas as pd
import random
from numpy.random import normal
from IPython.display import Audio

In [2]:
#file navigations

main_dir = "/scratch/ci411/spokenweb/"

isci = main_dir + "ICSI Meetings/Signals/"
isci_files = [isci+file+'/'+file+'.interaction.wav' for file in os.listdir(isci)]

urbansed = main_dir + "URBAN-SED_v2.0.0/audio/test/"
urbansed_files = []
for file in os.listdir(urbansed):
    if file[0]!='.':
        urbansed_files.append(urbansed + file)
        
scaper_files = main_dir + 'scaper_files/'

commonvoice = main_dir + 'commonvoice/clips/'
commonvoice_list = os.listdir(commonvoice)
commonvoice_files = []
i=0
while len(commonvoice_files)<100:
    file = commonvoice_list[i]
    i+=1
    audio, sr = lr.load(commonvoice + file)
    commonvoice_files.append(commonvoice+file)


In [13]:
#import into pandas dataframe
train_cv = pd.read_csv(main_dir + 'commonvoice/train.tsv', sep='\t')
print(train_cv.columns)

Index(['client_id', 'path', 'sentence', 'up_votes', 'down_votes', 'age',
       'gender', 'accent'],
      dtype='object')


In [14]:
#sort data by clients with at least 3 entries, take 10 clients and 3 entries
client_groups = train_cv.groupby(['client_id']).groups
client_ids = []
client_files = []
for client in client_groups.keys():
    indexes = client_groups[client]
    if len(indexes)<3:
        continue
    client_ids.append(client)
    client_files.append(indexes[:3])
    if len(client_files) == 10:
        break

In [5]:
#prototype generating audio
ci = 1
output = np.array([])
for i in range(3):
    index = client_files[ci][i]
    loc = commonvoice + train_cv.loc[index].path+'.mp3'
    audio, sr = lr.core.load(loc)
    output = np.concatenate((output, audio))
    space = np.zeros(int(sr*random.random()*3))
    output = np.concatenate((output, space))
noise_scale = 1e-3 #maximum reasonable noise is ~2e-1,lower bound at 1e-3#
noise_width = noise_scale * (max(output) - min(output))/2
noise = normal(scale = noise_width, size = len(output)) #white noise
noisy_output = noise + output
noisy_output = lr.util.normalize(noisy_output, axis=0)

In [21]:
#function for generating data from indexes in the dataframe

def generate_audio(indexes, space_max = 5, snr = 1):
    output = np.array([])
    for index in indexes:
        path = commonvoice + train_cv.loc[index].path+'.mp3'
        audio, sr = lr.core.load(path)
        output = np.concatenate((output, audio))
        space = np.zeros(int(sr*random.random()*space_max-1)+1)
        output = np.concatenate((output, space))
    noise_width =  (max(output) - min(output))/2
    noise = np.cumsum(normal(scale = noise_width, size = len(output))) #brown noise
    noise = noise / max(noise)
    output = output / max(output)
    noisy_output = noise + (output * (10**snr))
    noisy_output = lr.util.normalize(noisy_output, axis=0)
    return noisy_output, sr

In [24]:
#test audio building function
output, sr = generate_audio(client_files[1], snr=-3)
Audio(data = output, rate = sr)

In [8]:
#building 10 samples from previousy defined clients with increasing noise levels
audio_array = []
sr_array = []
noise_range = np.logspace(-3,-1,len(client_files))
for i, files in enumerate(client_files):
    audio, sr = generate_audio(files, noise_scale = noise_range[i])
    audio_array.append(audio)
    sr_array.append(sr)

In [9]:
#testing results
i = 9
Audio(data = audio_array[i], rate = sr_array[i])

In [16]:
#saving results to file
silence_dir = '/home/ci411/SpokenWeb/silence_examples/'
for i in range(len(audio_array)):
    path = silence_dir + str(i) + '.wav'
    #lr.output.write_wav(path, audio_array[i], sr_array[i])

In [60]:
sil_dir = '/home/ci411/SpokenWeb/silence_examples/'

snr_range = np.linspace(-3,3,10)[::-1]

soundscape_duration = 30
for i, snr in enumerate(snr_range):
    sc = scaper.Scaper(soundscape_duration, foreground_folder, background_folder)
    sc.ref_db = -20

    sc.add_background(label=('const', 'brownian'),
                      source_time=('const', 0),
                      source_file = ('const', background_folder+'brownian/long_brownian.wav'))

    sc.add_event(label=('const', 'commonvoice_wav'),
                 source_file=('choose', []),
                 source_time=('const', 0),
                 event_time=('uniform', 10, 20),
                 event_duration=('const', 10),
                 snr=('const', snr),
                 pitch_shift=('const', 1),
                 time_stretch=('const', 1))

    fname = "silence_detection_{}".format(i) 
    file_loc = sil_dir + fname + '.wav'
    jams_loc = sil_dir + fname + '.jams'
    sc.generate(file_loc, jams_loc)



In [62]:
print(snr_range)
Audio(filename='/home/ci411/SpokenWeb/silence_examples/silence_detection_1.wav')

[ 3.          2.33333333  1.66666667  1.          0.33333333 -0.33333333
 -1.         -1.66666667 -2.33333333 -3.        ]


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


In [57]:
brownian, sr = lr.load('/scratch/ci411/spokenweb/scaper_files/background/brownian/brownian.wav')
for _ in range(6):
    brownian = np.concatenate([brownian,brownian])
lr.output.write_wav('/scratch/ci411/spokenweb/scaper_files/background/brownian/long_brownian.wav', brownian, sr)

In [3]:
soundscape_duration = 10.0
foreground_folder = scaper_files + "foreground/"
background_folder = scaper_files + "background/"
sc = scaper.Scaper(soundscape_duration, foreground_folder, background_folder)
sc.ref_db = -20

sc.add_background(label=('choose', ['park', 'restraunt', 'street']),
                  source_time=('uniform', 0,60),
                  source_file = ('choose', []))

sc.add_event(label=('const', 'commonvoice_wav'),
             source_file=('choose', []),
             source_time=('const', 0),
             event_time=('uniform', 0, 10),
             event_duration=('const', 10),
             snr=('const', 10),
             pitch_shift=('const', 1),
             time_stretch=('const', 1))

In [12]:
det_dir = '/home/ci411/SpokenWeb/detection_examples/'
test_loc = det_dir + 'test.wav'
test_jams = det_dir + 'test.jams'
sc.generate(test_loc, test_jams)



In [13]:
Audio(filename='/home/ci411/SpokenWeb/detection_examples/test.wav')

In [8]:
det_dir = '/home/ci411/SpokenWeb/detection_examples/'

snr_range = np.linspace(-3,3,10)[::-1]

for i, snr in enumerate(snr_range):
    sc = scaper.Scaper(soundscape_duration, foreground_folder, background_folder)
    sc.ref_db = -20

    sc.add_background(label=('choose', []),
                      source_time=('uniform', 0,60),
                      source_file = ('choose', []))

    sc.add_event(label=('const', 'commonvoice_wav'),
                 source_file=('choose', []),
                 source_time=('const', 0),
                 event_time=('uniform', 0, 10),
                 event_duration=('const', 10),
                 snr=('const', snr),
                 pitch_shift=('const', 1),
                 time_stretch=('const', 1))

    fname = "voice_detection_{}".format(i) 
    file_loc = det_dir + fname + '.wav'
    jams_loc = det_dir + fname + '.jams'
    sc.generate(file_loc, jams_loc)



In [9]:
Audio(filename='/home/ci411/SpokenWeb/detection_examples/voice_detection_0.wav')