## Audio Mosaic Generator
## Justin Chen, Nicolas Finkelstein, Kyle LaBrosse
EECS352 -- Professor Pardo

In [16]:
# Import packages
import IPython, numpy as np, scipy as sp, matplotlib.pyplot as plt, matplotlib, sklearn, librosa, cmath,math, csv
from IPython.display import Audio
from sklearn.datasets import load_iris
import random
# This line makes sure your plots happen IN the webpage you're building, instead of in separate windows.
%matplotlib inline

In [17]:
def smooth(x,window_len=11,window='hanning'):
    """smooth the data using a window with requested size.
    
    This method is based on the convolution of a scaled window with the signal.
    The signal is prepared by introducing reflected copies of the signal 
    (with the window size) in both ends so that transient parts are minimized
    in the begining and end part of the output signal.
    
    input:
        x: the input signal 
        window_len: the dimension of the smoothing window; should be an odd integer
        window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
            flat window will produce a moving average smoothing.

    output:
        the smoothed signal
        
    example:

    t=linspace(-2,2,0.1)
    x=sin(t)+randn(len(t))*0.1
    y=smooth(x)
    
    see also: 
    
    numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
    scipy.signal.lfilter
 
    TODO: the window parameter could be the window itself if an array instead of a string
    NOTE: length(output) != length(input), to correct this: return y[(window_len/2-1):-(window_len/2)] instead of just y.
    """

    if x.ndim != 1:
        raise ValueError("smooth only accepts 1 dimension arrays.")

    if x.size < window_len:
        raise ValueError("Input vector needs to be bigger than window size.")


    if window_len<3:
        return x


    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise ValueError("Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")


    s=np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]]
    #print(len(s))
    if window == 'flat': #moving average
        w=np.ones(window_len,'d')
    else:
        w=eval('np.'+window+'(window_len)')

    y=np.convolve(w/w.sum(),s,mode='valid')
    return y


In [18]:
def collect_filenames(path_to_esc50_csv):
    
    '''
    Collect file names for training and testing set from `./dataset/meta/esc50.csv`. 

    Input Parameters
    ----------------
    path_to_esc50_csv: a string indicating a path to esc50.csv in ESC50 dataset.
    
    
    Returns
    ----------------
    filenames: a dictionary containing file names of source set. 
                    Its keys are each class name: 'sneezing', 'snoring' 
    '''
    filenames = {'laughing':[]}
    
    with open(path_to_esc50_csv) as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            if row['category'] == 'laughing':
                filenames['laughing'].append(row['filename'])
                
    
    return filenames

In [19]:
def feature_extraction(signal, sr):
    mfcc = librosa.feature.mfcc(signal, sr=sr)
    mfcc_mean = []
    mfcc_delta_mean = []
    
    for val in mfcc:
        mfcc_mean.append(np.mean(val))
        mfcc_delta_mean.append(np.mean(np.diff(val)))
    
    mfcc_mean = np.array(mfcc_mean)
    mfcc_delta_mean = np.array(mfcc_delta_mean)
    feature_vector = np.concatenate((mfcc_mean, mfcc_delta_mean))
    
    return feature_vector

In [22]:
train_filenames = collect_filenames("./esc50/meta/esc50.csv")

concat = np.array([])

for key in train_filenames:
    for file in train_filenames[key]:
        file = './esc50/audio/' + file
        signal, sr = librosa.load(file, sr=None)
        concat = np.concatenate([concat, signal])

In [21]:
target = train_filenames["laughing"][14]
file = "./esc50/audio/"+target
target, sr= librosa.load(file, sr=None)
Audio(target, rate=sr)

In [40]:
def slice_audio(signal, window_size):
    #splits audio signal into smaller signals of length window_size
    #X[i] is the audio signal of window i
    
    signal_len = len(signal)
    window_num = math.floor(signal_len/window_size)
    X = np.zeros((window_num, window_size))
    for i in range(0,window_num):
        X[i] = signal[i*window_size:(i+1)*window_size]
    
    return X

In [69]:
#split entire set of train_filenames into segments of size window_size
window_size = 100
output = np.zeros((1,window_size))
for key in train_filenames:
    for file in train_filenames[key]:
        file = './esc50/audio/' + file
        signal, sr = librosa.load(file, sr=None)
        X = slice_audio(signal, window_size)
        output = np.concatenate((output,X),0)

output = output[1:] #output[i] is window i

print(output.shape)



(88200, 100)
