Cocktail Party Problem

In [38]:
import numpy as np
import scipy.io.wavfile

In [39]:
#Sample rate(samples/sec)
Fs = 11025


In [40]:
def update_W(W, x, learning_rate):
    """
    Perform a gradient ascent update on W using data element x and the provided learning rate.

    This function should return the updated W.

    Use the laplace distribiution in this problem.

    Args:
        W: The W matrix for ICA
        x: A single data element
        learning_rate: The learning rate to use

    Returns:
        The updated W
    """
    x=np.reshape(x,(x.shape[0],1))
    
    #Gradient ascent update
    W=W+learning_rate*((np.linalg.pinv(W.T))-((np.sign((W@x)))@x.T)) #'.pinv()' instead of '.inv()' for pseudo inverse if in case the determinant is zero
    
    return W

In [48]:
def unmix(X, W):
    """
    Unmix an X matrix according to W using ICA.

    Args:
        X: The data matrix
        W: The W for ICA

    Returns:
        A numpy array S containing the split data
    """

    S=np.zeros(X.shape)
    S=X@(W.T)


   

    return S

In [49]:
def normalize(dat):
    return 0.99 * dat / np.max(np.abs(dat))


In [50]:
def load_data():
    mix = np.loadtxt(r'mix.dat')
    return mix


In [51]:
def save_sound(audio, name):
    scipy.io.wavfile.write(f'{name}.wav', Fs, audio)


In [52]:
def unmixer(X):
    M, N = X.shape
    W = np.eye(N)

    anneal = [0.1 , 0.1, 0.1, 0.05, 0.05, 0.05, 0.02, 0.02, 0.01 , 0.01, 0.005, 0.005, 0.002, 0.002, 0.001, 0.001]
    print('Separating tracks ...')
    for lr in anneal:
        print(lr)
        rand = np.random.permutation(range(M))
        for i in rand:
            x = X[i]
            W = update_W(W, x, lr)

    return W

In [53]:
def main():
    #Normalizing the data
    X=normalize(load_data())
    
    for col in range(X.shape[1]):
        save_sound(X[:,col],f'mixed_{col}' )
    
    W=unmixer(X) 
    
    #Sources matrix
    S=unmix(X,W)
    
    for col in range(S.shape[1]):
        save_sound(S[:,col],f'split_{col}' )

Suggestion: Try using vlc media player if you're having trouble playing the audio(.wav files)

In [54]:
if __name__=='__main__':
    main()

Separating tracks ...
0.1
0.1
0.1
0.05
0.05
0.05
0.02
0.02
0.01
0.01
0.005
0.005
0.002
0.002
0.001
0.001
