In [1]:
# Scientific libraries
import scipy
import pylab
import numpy as np
import numpy.ma as ma
import theano
import theano.tensor as T
from sklearn.decomposition import NMF

# Audio libraries
from scipy.io import wavfile
from IPython.display import display, Audio

# Operational libraries
import os
import matplotlib.pylab as plt
%matplotlib inline

Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled, cuDNN 5110)


## Read in the data

In [2]:
audio_dir = "/data/fs4/datasets/magnolia/sisec/dev/"
fs, snda = wavfile.read(os.path.join(audio_dir, "dev_Ca1_Ce_A_src.wav"))
fs, sndb = wavfile.read(os.path.join(audio_dir, "dev_Ca1_Ce_B_src.wav"))

sndab = snda+sndb

## Utility Functions and Non-negative Matrix Factorization

In [57]:
def stft(x, fs, framesz, hop):
    '''
    Short Time Fourier Transform (STFT) - Spectral decomposition
    
    Input:
        x = signal (1-d array, which is amp/sample)
        fs = sampling frequency (in Hz)
        framesz = frame size (in seconds)
        hop = skip length (in seconds)
        
    Output:
        X = 2d array, window by number of windows
    '''
    framesamp = int(framesz*fs)
    hopsamp = int(hop*fs)
    w = scipy.hanning(framesamp)
    X = scipy.array([scipy.fft(w*x[i:i+framesamp]) 
                     for i in range(0, len(x)-framesamp, hopsamp)])
    return X

def istft(X, fs, T, hop):
    ''' Inverse Short Time Fourier Transform (iSTFT) - Spectral reconstruction
    
    Input:
        X = set of 2D windows
        fs = sampling frequency (in Hz)
        T = total length of reconstruction willing to be performed
        hop = skip rate
    '''
    x = scipy.zeros(T*fs)
    framesamp = X.shape[1]
    hopsamp = int(hop*fs)
    for n,i in enumerate(range(0, len(x)-framesamp, hopsamp)):
        x[i:i+framesamp] += scipy.real(scipy.ifft(X[n]))
    return x

# def nmf(X, k, maxiter=1000):
k=2
maxiter=1000
X = np.random.rand(100,1000)
W = np.random.rand(X.shape[0],k)
H = np.random.rand(k,X.shape[1])

# Convert to Python
F = size(V,1);
T = size(V,2);

rand('seed',0)
W = 1+rand(F, K);
% W = W./repmat(sum(W),F,1);
H = 1+rand(K, T);

ONES = ones(F,T);

for i=1:MAXITER
    % update activations
    H = H .* (W'*( V./(W*H+eps))) ./ (W'*ONES);

    % update dictionaries
    W = W .* ((V./(W*H+eps))*H') ./(ONES*H')


## Spectral transform and component decomposition

The decomposition below fits a model:

$$ \| X - WH \|_F^2 + \alpha \lambda_1 \| W \|_1 + \alpha \lambda_1 \| H \|_1  $$

Unfortunately, the code below only stores $W$, and leaves the user to calculate $H$

%d 0
%d 1
%d 2
%d 3


KeyboardInterrupt: 

## Reconstruction (first Fourier and time-domain)

Based on the spectral decomposition in the above cell, where



In [None]:
ws = np.zeros(model.components_.shape)
masks = np.zeros((len(model.components_),) + sndabft.shape)
for i,wi in enumerate(model.components_):
    ws[i]= wi / np.linalg.norm(wi)
    energyi = ws[i].dot(abs(sndabft).T)
    masks[i] = np.outer( energyi, ws[i] )
    
maska = (masks[0]) > (masks[1])
maskb = (masks[1]) > (masks[0])

plt.figure();
plt.subplot(211); plt.plot( ws[0].dot(abs(sndabft).T)/1e5 )
plt.subplot(212); plt.plot( ws[1].dot(abs(sndabft).T)/1e5 )
plt.figure(); plt.imshow(maska.T, aspect=.2, cmap='bone')
plt.figure(); plt.imshow(maskb.T, aspect=.2, cmap='bone')

In [None]:
sndareconft = maska*abs(sndabft)*np.exp( 1j* np.angle(sndabft))  # Apply mask A
sndarecon = istft( sndareconft, fs, 5, 0.025 )

sndbreconft = maskb*abs(sndabft)*np.exp( 1j* np.angle(sndabft))  # Apply mask B = 1 - A
sndbrecon = istft( sndbreconft, fs, 5, 0.025 )

## Play audio

In [None]:
print('Original signal')
display(Audio(sndab, rate=fs))
print('Reconstruction of signal A')
display(Audio(sndarecon,rate=fs))
print('Reconstruction of signal B')
display(Audio(sndbrecon,rate=fs))