In [2]:
import numpy as np
from pylab import *
from scipy.io import wavfile
import matplotlib.pyplot as plt
from scipy import interpolate
from os import listdir

In [3]:
'''
compileAnimals = f(compileData, addResponse)
compileData = f(changeSampleRate, dataSlice)
'''


def changeSampleRate(array, rate, new_rate = 166600):
    '''
    Returns signal converted to new sample rate
    
    '''
    
    length = len(array)
    
    x = np.arange(0, length)
    y = array[0:length]
    f = interpolate.interp1d(x, y, kind = 'nearest')
    
    step = rate/float(new_rate)
    
    xnew = np.arange(0, length - 1, step)
    ynew = f(xnew)

    return ynew

def dataSlice(array, slicelength = 166600):
    '''
    input array and desired length of slices
    output list of data slices
    '''
    
    data = []
    slices = len(array)//slicelength
    
    
    for index in np.arange(slices):
        start = index*slicelength
        end = start + slicelength
        datum = array[start:end]
        data.append(datum)
    
    return np.array(data)

def compileData(path):
    files = listdir(path)
    data = []
    for f in files:
        sampFreq, signal = wavfile.read(path + f)
        new_signal = changeSampleRate(signal, sampFreq)
        sliced_signals = dataSlice(new_signal)

        if sliced_signals.shape == (0,): # some signals are too short, remove them so 
            pass                         # there isn't an error in np.vstack below
        else:
            data.append(sliced_signals)
            del sliced_signals


    data_numpified = np.vstack(tuple(data))
    del data

    return data_numpified

def addResponse(array, label):
    '''
    Adds responses to data matrix
    '''
    column = np.zeros((array.shape[0],1))
    
    column.fill(label)
    
    #print(array.shape, column.shape)
    array_new = np.hstack((array, column))
    del array, column
    
    return array_new

def compileAnimals(feed_dict):
    
    data = []
    for animal in feed_dict.keys():
    
        array = compileData(animal)
        array_new = addResponse(array, feed_dict[animal])

        data.append(array_new)

        del array, array_new

    data = tuple(data)
    database = np.vstack(data)
    
    return database

In [37]:
feed_dict = {'./BD15C/':0,
             './BD6A/':1,
             './BD6B/':2,
             './BD15C/':3
            }

database = compileAnimals(feed_dict)



In [42]:
database[database[:,-1] == 5].shape

(21, 166601)

In [43]:
np.save('database2.npy', database)

In [36]:
np.load('database.npy')

array([[  2.60800000e+03,   2.60800000e+03,   2.62400000e+03, ...,
          2.56000000e+02,   7.84000000e+02,   2.00000000e+00],
       [  2.24000000e+02,   2.24000000e+02,   1.28000000e+02, ...,
          6.40000000e+01,   2.72000000e+02,   2.00000000e+00],
       [ -5.28000000e+02,  -5.28000000e+02,  -1.44000000e+02, ...,
          1.07200000e+03,  -3.44000000e+03,   2.00000000e+00],
       ..., 
       [ -4.32000000e+02,  -4.32000000e+02,  -5.60000000e+02, ...,
          7.20000000e+02,  -1.44000000e+02,   4.00000000e+00],
       [ -4.80000000e+02,  -4.80000000e+02,  -4.16000000e+02, ...,
          2.56000000e+03,   7.45600000e+03,   4.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   4.80000000e+01, ...,
          3.74400000e+03,   4.60800000e+03,   4.00000000e+00]])

In [1]:
database

NameError: name 'database' is not defined