In [1]:
import librosa
import math
import matplotlib.pyplot as plt
from scipy.spatial.distance import hamming as h
import numpy as np

### Reading .wav files 

In [2]:
src, sr1 = librosa.load('trs.wav', sr = None)
noise, sr2 = librosa.load('trn.wav', sr = None)
x_nmf, sr3 = librosa.load('x_nmf.wav', sr = None)

In [3]:
def STFT(x,N,hop) :
    F = np.zeros((N,N),dtype=np.complex_)
    for i in range(N):
        for j in range(N):
            F[i,j] = np.exp(-1j*2*math.pi*i*j/N )
    frames = (len(x)-hop)/hop
    X = np.zeros((N,frames))
    for i in range(frames):
        X[:,i] = x[i*hop:i*hop+N] * np.hanning(N)    
    Y = np.matmul(F,X)
    return Y    

### ISTFT Function

In [4]:
def ISTFT(X,N,hop,len_signal):
    S_hat_full = np.zeros((1024,129), dtype=np.complex_)
    for i in range(0,513):
        S_hat_full[i,:] = X[i,:]
    for i in range(513,1024):
        S_hat_full[i,:] = np.conj(X[511-(i-513),:])
    frames = X.shape[1]
    inverse_F = np.zeros((N,N),dtype=np.complex_)
    for i in range(N):
        for j in range(N):
            inverse_F[i,j] = np.exp(1j*2*math.pi*i*j/N )/N
    X_inv = np.matmul(inverse_F,S_hat_full)
    X_inv = np.real(X_inv)
    x_inv = np.zeros(shape=(frames, len_signal),dtype=np.complex_)
    for i in range(frames):
        x_inv[i, (hop * i):(hop* i) + 1024] = X_inv[:, i]
    x_inv = x_inv.sum(axis=0)
    return x_inv.real

In [5]:
S = abs(STFT(src, 1024, 512)[0:513,:])
N = abs(STFT(noise, 1024, 512)[0:513,:])
G = abs(STFT(src+noise, 1024, 512)[0:513,:])
X = STFT(x_nmf,1024,512)[0:513,:]
Y = abs(X)

In [6]:
B = np.zeros((513,987))
for i in range(513):
    for j in range(987):
        if S[i][j] > N[i][j]:
            B[i][j] = 1
        else:
            B[i][j] = 0

In [7]:
def KNN(K,G,Y) :
    D = np.zeros((513,129))
    for i in range(129):
        distance = []
        for j in range(987):
            distance.append((np.linalg.norm(Y[:,i]-G[:,j]),j))
        distance = sorted(distance, key = lambda x:x[0])
        indices = [d[1] for d in distance[0:K]]
        #print(distance[0:K])
        #print(indices)
        #break
        D[:,i] = np.median(np.array([B[:,j] for j in indices]),axis = 0)
    return D 

In [8]:
D = KNN(5,G,Y)

In [9]:
D.shape

(513, 129)

In [10]:
S_hat = np.multiply(D,X)

In [11]:
def ISTFT(X,N,hop,len_signal):
    S_hat_full = np.zeros((1024,129), dtype=np.complex_)
    for i in range(0,513):
        S_hat_full[i,:] = X[i,:]
    for i in range(513,1024):
        S_hat_full[i,:] = np.conj(X[511-(i-513),:])
    frames = X.shape[1]
    inverse_F = np.zeros((N,N),dtype=np.complex_)
    for i in range(N):
        for j in range(N):
            inverse_F[i,j] = np.exp(1j*2*math.pi*i*j/N )/N
    X_inv = np.matmul(inverse_F,S_hat_full)
    X_inv = np.real(X_inv)
    x_inv = np.zeros(shape=(frames, len_signal),dtype=np.complex_)
    for i in range(frames):
        x_inv[i, (hop * i):(hop* i) + 1024] = X_inv[:, i]
    x_inv = x_inv.sum(axis=0)
    return x_inv.real

In [12]:
x_clean = ISTFT(S_hat,1024,512, len(x_nmf))

In [13]:
librosa.output.write_wav('x_prob4.wav', x_clean, sr3)