In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
from time import time
from utils import *
from os.path import join
from multiprocessing import Pool
import numpy as np
import itertools
import time as timeMod
from time import mktime
from datetime import datetime
import scipy.spatial
import scipy.spatial.distance as dist 
import python_speech_features as psf
import pickle
from skimage.measure import block_reduce
from sklearn.decomposition import PCA

In [2]:
data_root = 'drumData/'
sr = 48000 # this is the samplerate initially used to load the samples
drumNames = pickle.load(open(data_root+'drumNames.pickle'))
drumLengths = pickle.load(open(data_root+'drumLengths.pickle'))
drumMFCCs = {}
drumSamples = {}
n_fft = 1024
hop_length = n_fft/4
use_logamp = False # boost the brightness of quiet sounds
reduce_rows = 10 # how many frequency bands to average into one
reduce_cols = 1 # how many time steps to average into one
crop_rows = 32 # limit how many frequency bands to use
crop_cols = 32 # limit how many time steps to use
limit = None # set this to 100 to only process 100 samples
colors = ['#000000', '#ff0000', '#00ff00', '#0000ff', '#ffff00', '#ff00ff', '#00ffff']
initial_dims = [30]
perplexities = [30]

In [6]:
for d in drumNames:
    %time drumSamples[d] = np.load(join(data_root, d+'_samples.npy'))

window = np.hanning(n_fft)
def job(y):
    mfcc = psf.mfcc(y, samplerate=sr, winlen=len(y)/sr, winstep=1.0/sr*hop_length, numcep=22, nfft=n_fft, winfunc=np.hanning)  
    if reduce_rows > 1 or reduce_cols > 1:
        amp = block_reduce(amp, (reduce_rows, reduce_cols), func=np.mean)
    if amp.shape[1] < crop_cols:    
        amp = np.pad(amp, ((0, 0), (0, crop_cols-amp.shape[1])), 'constant')
    amp = amp[:crop_rows, :crop_cols]
   
    #are you supposed to normalize mfcc?
    amp -= amp.min()
    if amp.max() > 0:
        amp /= amp.max()
    return amp

for d in drumNames:
    pool = Pool()
    %time mfccs = pool.map(job, drumSamples[d][:limit])
    mfccs = np.asarray(mfccs).astype(np.float32)
    mfccs = mfccs.reshape(len(mfccs), -1)
    drumMFCCs[d] = mfccs
    print "generated mfccs for", d, mfccs.shape

data = np.concatenate([drumMFCCs[d] for d in drumNames])

CPU times: user 946 µs, sys: 203 ms, total: 204 ms
Wall time: 363 ms
CPU times: user 478 µs, sys: 16.2 ms, total: 16.7 ms
Wall time: 35.5 ms
CPU times: user 525 µs, sys: 96.7 ms, total: 97.2 ms
Wall time: 156 ms
CPU times: user 1.45 ms, sys: 59 ms, total: 60.4 ms
Wall time: 124 ms
CPU times: user 472 µs, sys: 6.52 ms, total: 6.99 ms
Wall time: 10.8 ms
CPU times: user 407 µs, sys: 9.34 ms, total: 9.75 ms
Wall time: 16.3 ms
CPU times: user 400 µs, sys: 25 ms, total: 25.4 ms
Wall time: 43.2 ms
CPU times: user 267 ms, sys: 214 ms, total: 481 ms
Wall time: 3.12 s
generated mfccs for kick (5158, 160)
CPU times: user 24.6 ms, sys: 20.6 ms, total: 45.3 ms
Wall time: 246 ms
generated mfccs for tom (422, 160)
CPU times: user 131 ms, sys: 102 ms, total: 233 ms
Wall time: 1.51 s
generated mfccs for snare (2546, 160)
CPU times: user 81.3 ms, sys: 63.1 ms, total: 144 ms
Wall time: 856 ms
generated mfccs for clap (1324, 160)
CPU times: user 15.5 ms, sys: 11.5 ms, total: 27 ms
Wall time: 161 ms
genera

In [None]:
for d in drumNames:
    np.save(join(data_root, d+'_mfccs.npy'), drumMFCCs[d])
    print "saved", d+'_mfccs.npy'

# define TSNE/PCA calculations

In [7]:
X_2d_inspect = None

def save_data(data, fn):
    np.savetxt(fn, data, fmt='%.5f', delimiter='\t')

def savePlotsAndData(newData, data_root, prefix, colorMap, dataDir, plotDir, initial_dims=30, perplexity=30):    
    figsize = (16,16)
    pointsize = 30
    
    struct = timeMod.localtime(time())
    dt = datetime.fromtimestamp(mktime(struct))
    
    save_data(newData, join(data_root, dataDir+'/{}.{}.{}.2d - {}.tsv'.format(prefix, initial_dims, perplexity, dt)))
    
    plt.figure(figsize=figsize)
    plt.scatter(newData[:,0], newData[:,1], c=colorMap, s=pointsize)
    plt.tight_layout()
    plt.savefig(join(data_root, plotDir+'/{}.{}.{}_2D - {}.png'.format(prefix, initial_dims, perplexity, dt)))
    plt.close()
    
def tsne(data, data_root, prefix, colorMap, initial_dims=30, perplexity=30):
    mkdir_p(data_root + 'tsne')
    mkdir_p(data_root + 'plot')
    
    print initial_dims, perplexity, type(data), data.shape, data.dtype
    X_2d = list(bh_tsne(data, initial_dims=initial_dims, perplexity=perplexity, no_dims=2, verbose=True))
    X_2d = normalize(np.array(X_2d))
    
    savePlotsAndData(X_2d, data_root, prefix, colorMap, 'tsne', 'plot')
    
    return X_2d

def pca(data, data_root, prefix, colorMap):
    mkdir_p(data_root + 'pca')
    mkdir_p(data_root + 'pcaPlot')
    
    pcaInstance = PCA(n_components=2)  
    X_2d = pcaInstance.fit_transform(data)
    
    savePlotsAndData(X_2d, data_root, prefix, colorMap, 'pca', 'pcaPlot')
    
    return X_2d

    
def concatColors(segmentList, colorList):
    multiples = []
    #print segmentList, colorList
    for i in range(len(segmentList)):
        multiples.append([colorList[i]]*segmentList[i])
    return list(itertools.chain(*multiples))

In [None]:
drumMFCCdata = []
for drum in drumNames:
    drumMFCCs[drum] = np.load(join(data_root, drum+'_mfccs.npy'))
    drumMFCCdata.append(drumMFCCs[drum])
    print drum, drumMFCCdata[-1].shape
drumLengths = [drummfcc.shape[0] for drummfcc in drumMFCCdata]
colorMap = concatColors(drumLengths, colors)
mfccData = np.vstack(drumMFCCdata)
mfccData = mfccData.reshape(len(mfccData), -1)
data = mfccData

### Do dimensionality reduction

In [8]:
mfccData = data
drumPrints = []
for drum in drumNames:
    drumPrints.append(np.load(join(data_root, drum+'_fingerprints.npy')))
    #print drum, drumPrints[-1].shape
drumLengths = [drumPrint.shape[0] for drumPrint in drumPrints]
colorMap = concatColors(drumLengths, colors)
fingerprints = np.vstack(drumPrints)
fingerprints = fingerprints.reshape(len(fingerprints), -1)

In [11]:
initial_dims = [30]
perplexities = [30]

useFingerprints = False
useTsne = True
if useFingerprints:
    data = fingerprints[:100]
    prefix = 'fingerprints'
else:
    data = mfccData[:100]
    prefix = 'mfcc'

data = data.astype(np.float64)
colorMap = concatColors(drumLengths, colors)
def job(params):
    start = time()
    if useTsne:
        data2d = tsne(data, data_root, prefix, colorMap, initial_dims=params[0], perplexity=params[1])
    else:
        data2d = pca(data, data_root, prefix, colorMap)
    print 'initial_dims={}, perplexity={}, {} seconds'.format(params[0], params[1], time() - start)
    return data2d
params = list(itertools.product(initial_dims, perplexities))
dimReducedArrays = job(params[0])
newData = dimReducedArrays[0]

30 30 <type 'numpy.ndarray'> (100, 160) float64


AssertionError: ERROR: Call to bh_tsne exited with a non-zero return code exit status, please refer to the bh_tsne output for further details

In [21]:
mfccData[0]


array([  1.00000000e+00,   0.00000000e+00,   8.89929660e-16,
         0.00000000e+00,   6.63828360e-15,   0.00000000e+00,
         5.71335454e-15,   0.00000000e+00,   8.52519518e-15,
         0.00000000e+00,   9.09368812e-15,   0.00000000e+00,
         9.97359527e-15,   0.00000000e+00,   8.18022154e-15,
         0.00000000e+00,   4.91742267e-15,   0.00000000e+00,
         2.82425814e-15,   0.00000000e+00,   9.53673654e-16,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   1.00000000e+00,
         0.00000000e+00,   8.89929660e-16,   0.00000000e+00,
         6.63828360e-15,   0.00000000e+00,   5.71335454e-15,
         0.00000000e+00,   8.52519518e-15,   0.00000000e+00,
         9.09368812e-15,   0.00000000e+00,   9.97359527e-15,
         0.00000000e+00,   8.18022154e-15,   0.00000000e+00,
         4.91742267e-15,

### Common setup 

In [None]:
# create point -> class hashmap
def getClassesPerSample(data):
    drumClasses = {}
    classIndex = 0
    for i in range(len(data)):
        if sum(drumLengths[0:classIndex+1]) <= i:
            classIndex += 1
        drumClasses[tuple(data[i])] = drumNames[classIndex]
    return drumClasses

drumClasses = getClassesPerSample(data)

numNeighbors = 10

## Arguments
## data -    an array of length n where array[i] is the fraction of neighbors of point i 
#            had the same class as point i.
# numPerClass - Assumes that points from the same class are contiguous and in
#            the same order as drumNames. Should use drumLengths for this parameter
# calcFunc - The summary statistic you want to calcuate per class (mean, medain, etc)
def calculateFuncPerClass(data, numPerClass, calcFunc):
    segments = [0]+numPerClass
    for i in range(1, len(segments)):
        segments[i] = segments[i] + segments[i-1]
    valuesPerClass = []
    for i in range(len(segments)-1):
        valuesPerClass.append(calcFunc(data[segments[i]:segments[i+1]]))
    return valuesPerClass

### pairwise distance implementation

In [None]:
#using https://docs.scipy.org/doc/scipy-0.19.0/reference/generated/scipy.spatial.distance.cdist.html
#and argpartition from - https://stackoverflow.com/questions/6910641/how-to-get-indices-of-n-maximum-values-in-a-numpy-array

## Arguments - pointData is m x n np array, with n points of m dimensions
# pointD numNeighbors is the number of nearest neighbors for which to compare classes
#
## returns - an array of length n where array[i] is the fraction of neighbors of point i 
#            had the same class as point i
def calculateKNNClassAccuracy_pairwise(pointData, numNeighbors, printTimes=False):
    startTime = timeMod.time()
    pairwiseDist = dist.cdist(pointData, pointData)
    if printTimes:
        print "pairwise distances calculated", timeMod.time() - startTime
    kPartition = np.argpartition(pairwiseDist, -numNeighbors)
    if printTimes:
        print "partitions calculated", timeMod.time() - startTime
    fracSameNeighborClasses = np.zeros((len(data)))
    
    for i in range(len(pairwiseDist)):
        neighborIndexes = kPartition[i][-numNeighbors:]
        neighbors = [data[ind] for ind in neighborIndexes]
        
        sampleClass = drumClasses[tuple(data[i])]
        neighborClasses = [drumClasses[tuple(neighbor)] for neighbor in neighbors]
        numSameNeighborClasses = len(filter(lambda c : c == sampleClass, neighborClasses))
        fracSameNeighborClasses[i] = numSameNeighborClasses * 1.0 / numNeighbors
    
    if printTimes:
        print "knn classes calculated", timeMod.time() - startTime
        
    return fracSameNeighborClasses

#classAccuracies_LD = calculateKNNClassAccuracy(newData, numNeighbors, True)
classAccuracies_HD = calculateKNNClassAccuracy_pairwise(data, numNeighbors, True)