In [1]:
#!pip install pandas scipy pandas
#!pip install --upgrade https://github.com/EmilWine/gpuRIR/zipball/master
#!pip3 install scikit-image
#!pip3 install mpldatacursor

In [2]:
import numpy as np
import numpy.matlib
import matplotlib.pyplot as plt

from math import ceil
import librosa
import librosa.display
import gpuRIR
import scipy.signal as signal
import pandas as ps
from mpl_toolkits.mplot3d import Axes3D
import mpl_toolkits.mplot3d as mp3d
from ipywidgets import *
from scipy.optimize import linear_sum_assignment
from multiprocessing import Manager, Pool
from skimage.feature.peak import peak_local_max

import warnings
warnings.filterwarnings('ignore')

%matplotlib nbagg

In [3]:
%matplotlib nbagg

### Acoustical simulation types
1. Image method
2. Ray tracing
3. Wave physics

### Room acoustics

![image.png](attachment:image.png)

![image.png](attachment:image.png)

![image.png](attachment:image.png)

![image.png](attachment:image.png)

* Mixing time is the transition from specular dominated reflections to diffused sominated reflections

### Localization approaches

1. Model based
    * Find a room model to fit the measurments
    * Find array's location in the room 
    * Find work zones in time domain
    * Track peaks in work zones

2. Model-less method
    * Find static peaks. 
    * Track static peaks by majority.
    * Define work zones in time domain
    * Track peaks in work zones


- Both methods can be combined
- The analisys focuses more on the model as for the 2'nd method recordings are needed. 
    * I misinterpreted the goal initially :(
- Common groundwork for both approaches.

- To analyze the modelless method, much the following tools are required: 
    * Mixed ray tracing and sound field tool is required.
    * Correlated multiple mics
    * Generated room DB with human positions and obstacles
    


## Image method simulation

In [4]:
## Source Array positions
room_sz = [3,2.2,2.5]  # Size of the room [m]
pos_src = np.array([[0.6,0.7,1.0]]) # Positions of the sources ([m]
pos_rcv = np.vstack((pos_src+np.array([[0,0.1,0]]),
                   pos_src+np.array([[0.1,0.0,0]])))

In [5]:
orV_rcv = np.matlib.repmat(np.array([0,1,0]), len(pos_rcv), 1) 
# Vectors pointing in the same direction than the receivers
mic_pattern = "omni" # Receiver polar pattern
abs_weights = [1.0]*5+[1.0] # Absortion coefficient ratios of the walls
T60 = 1.0 # Time for the RIR to reach 60dB of attenuation [s] 
att_diff = 5.0 # Attenuation when start using the diffuse reverberation model [dB]
att_max = 60.0 # Attenuation at the end of the simulation [dB]
Tw = 3 #samples window -  This is the smearing factor
fs=48000*2 # Sampling frequency [Hz]
c = 343

In [6]:
## Helper functions
def envelope(sig_in): 
    return np.abs(signal.hilbert(sig_in))

def energy_decay(rir):
    e_rir = np.cumsum(np.abs(rir)**2)
    e_rir = e_rir[-1] - e_rir
    e_rir /= e_rir[0]
    return e_rir

def tvec(vec,fs,centered=False): 
    temp_time = 1000*np.arange(len(vec)) / fs
    if centered:
        temp_time -= np.mean(temp_time)
    return temp_time

def window_framing(arr,win_size,win_step):
    framed = []
    idx = 0
    hann_window = signal.windows.hann(win_size)
    while idx <= len(arr) - win_size:
        framed.append(hann_window * arr[idx:(idx+win_size)].tolist())
        idx += win_step
    return np.array(framed).T


def to_db(vec):
    return 20*np.log10(np.abs(vec) + 1e-6)

def to_lin(vec):
    return 10**(vec/20)


# Display array configuraion

In [7]:
import matplotlib.pyplot as plt

wx,wy,wz = room_sz

bot = [(0, 0, 0),(wx, 0, 0),(wx, wy, 0),(0, wy, 0)]
top =  [(0, 0, wz),(wx, 0, wz),(wx, wy, wz),(0, wy, wz)]
p1 =  [(0, 0, wz),(0, wy, wz),(0, wy, 0),(0, 0, 0)]
p2 =  [(wx, 0, wz),(wx, wy, wz),(wx, wy, 0),(wx, 0, 0)]


fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111, projection='3d')
face1 = mp3d.art3d.Poly3DCollection([bot], alpha=0.1, linewidth=1)
face2 = mp3d.art3d.Poly3DCollection([top], alpha=0.1, linewidth=1)
face3 = mp3d.art3d.Poly3DCollection([p1], alpha=0.5, linewidth=1)
face4 = mp3d.art3d.Poly3DCollection([p2], alpha=0.5, linewidth=1)
face1.set_facecolor((0, 0, 1))
face2.set_facecolor((0, 0, 1))

ax.scatter(pos_src[:,0],pos_src[:,1],pos_src[:,2],label="Sources",marker='D')
ax.scatter(pos_rcv[:,0],pos_rcv[:,1],pos_rcv[:,2],label="Receivers",marker='X')

ax.add_collection3d(face1)
ax.add_collection3d(face2)
ax.add_collection3d(face3)
ax.add_collection3d(face4)
ax.set_xlabel('x [m]')
ax.set_ylabel('y [m]')
ax.set_zlabel('z [m]')
ax.set_xlim([0,3])
ax.set_ylim([0,3])
ax.set_zlim([0,3])

plt.show()


<IPython.core.display.Javascript object>

In [8]:
beta = gpuRIR.beta_SabineEstimation(room_sz, T60, abs_weights=abs_weights) # Reflection coefficients
Tdiff= gpuRIR.att2t_SabineEstimator(att_diff, T60) # Time to start the diffuse reverberation model [s]
Tmax = gpuRIR.att2t_SabineEstimator(att_max, T60)	 # Time to stop the simulation [s]
nb_img = gpuRIR.t2n( Tdiff, room_sz )	# Number of image sources in each dimension


In [9]:
Taus = gpuRIR.simulateTaus(room_sz, beta, pos_src, pos_rcv, nb_img, fs, \
                           orV_rcv=orV_rcv, mic_pattern=mic_pattern,c=c)
RIRs = gpuRIR.simulateRIR(room_sz, beta, pos_src, pos_rcv, nb_img, Tmax,\
                          Tw, fs, Tdiff=Tdiff, orV_rcv=orV_rcv, mic_pattern=mic_pattern,c=c)

In [10]:
# t = np.arange(int(ceil(Tmax * fs))) / fs
t = tvec(RIRs[0,0,:],fs)
direct_tau = 1000*np.linalg.norm(pos_src - pos_rcv[0,:])/343

plt.figure()
plt.title("Channel 0")
plt.plot(t,RIRs[0,0,:],label = 'Physical RIR')
plt.plot(1000*Taus[0,0]/fs,[0]*len(Taus[0,0]),'x',label="Specular TAUs from simulation")
plt.plot(direct_tau,0,'o',label="Line of sight")
plt.xlabel('time [ms]')
plt.legend()

plt.xlim([0,30])
plt.ylim([-0.1,0.1])
plt.grid()
plt.show()

<IPython.core.display.Javascript object>

In [11]:
## Calculate Optimal FFT window length

dmax = 0
for i in range(len(pos_rcv)):
    for j in range(len(pos_rcv)):
        dtmp = np.linalg.norm(pos_rcv[i,:]-pos_rcv[j,:])
        dmax = dmax if dmax > dtmp else dtmp

        
tau_max_ms = 1000*dmax/343

window_duration = 2*tau_max_ms + tau_max_ms #the extra is to avoid wrapping in cross correlation

Nhop = int(np.ceil(window_duration*fs/1000/4))
Nfft = Nhop * 4

print("FFT={0}, Hop={1}, Window-Length={2:.2f}ms".format(Nfft,Nhop,1000*Nfft/fs))

# dmax = np.linalg.norm(np.diff(pos_rcv,axis=0),axis=-1)

FFT=120, Hop=30, Window-Length=1.25ms


In [12]:
## Logchirp
t_probe = 0.2
t = np.arange(int(ceil(t_probe * fs))) / fs

fmin = 12000
fmax = 44000

probe_sig = signal.chirp(t,fmin,t_probe,fmax,method="linear")
window = signal.windows.hann(len(probe_sig))
probe_windowed = window*probe_sig
probe_corr = signal.correlate(probe_windowed,probe_windowed,'same')
probe_env = envelope(probe_corr)

m1 = np.sum(t*np.abs(probe_env))/np.sum(np.abs(probe_env))
m2 = np.sqrt(np.sum( ((t-m1)**2)*np.abs(probe_env))/np.sum(np.abs(probe_env)))
m1 -= np.mean(t)

peak_width_ms = 6*m2*1000

plt.figure()
plt.plot(1000*(t-np.mean(t)),probe_corr)
plt.plot(1000*(t-np.mean(t)),np.abs(probe_env))
plt.text(1000*m1,np.max(np.abs(probe_env)),"Correlation time {0:.2f}[ms]".format(peak_width_ms),\
         horizontalalignment='center',verticalalignment='bottom')
plt.xlim([-1000*Nfft/2/fs,1000*Nfft/2/fs])
plt.xlabel('Tau [ms]')

plt.figure()
plt.plot(probe_sig)
plt.plot(probe_windowed)
plt.plot(window)

freqs, times,probe_stft = signal.stft(probe_sig,fs=fs,nperseg=Nfft,noverlap=Nfft-Nhop,return_onesided=True)

plt.figure()
plt.pcolormesh(times,freqs,np.abs(probe_stft))
plt.show()





<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [13]:
df = ps.DataFrame()

normed_probe_windowed = probe_windowed / np.linalg.norm(probe_windowed)


snr_db = 10

for src in range(len(pos_src)):
    for rcv in range(len(pos_rcv)):
        rir = RIRs[src,rcv,:]
        clean_prir = signal.convolve(rir,normed_probe_windowed,'full',method="direct")
        snr_ratio = to_lin(-snr_db)
        noise_rms = np.std(clean_prir)*snr_ratio
        noised_prir = clean_prir + noise_rms * np.random.randn(*clean_prir.shape)
        
        ds_rir = np.flip(signal.convolve(np.flip(noised_prir),normed_probe_windowed))[len(normed_probe_windowed)-1:]
        framed_rir = window_framing(ds_rir, Nfft,Nhop)
        _,_,stft_rir = signal.stft(RIRs[src,rcv,:],fs=fs,nperseg=Nfft,noverlap=Nfft-Nhop,return_onesided=False)
        _,_,stft_drir = signal.stft(ds_rir,fs=fs,nperseg=Nfft,noverlap=Nfft-Nhop,return_onesided=False)
        
        e_rir = energy_decay(rir)
        e_ds_rir = energy_decay(ds_rir)
        
        ds_rir_tvec = tvec(ds_rir,fs)
        rir_tvec = tvec(rir,fs)
        rir_peaks,_ = signal.find_peaks(envelope(ds_rir),prominence=0.01)#,width=3,rel_height=0.5)
        
        df = df.append({"src":int(src),"rcv":int(rcv),"RIR":RIRs[src,rcv,:],
                        "clean_pRIR":clean_prir,"noised_pRIR":noised_prir,
                       "stft_RIR":stft_rir,"stft_dRIR":stft_drir, "dRIR":ds_rir, 
                        "framed_RIR": framed_rir, "PEAKS": ds_rir_tvec[rir_peaks],
                        "dTVEC":ds_rir_tvec,"TVEC":rir_tvec,
                       "TAU":1000*Taus[src,rcv]/fs,"eRIR":e_rir,"edRIR":e_ds_rir},
                       ignore_index=True)

plt.figure(figsize=(9,8))

plt.subplot(411)
plt.title("Probed signal")
plt.plot(df.iloc[0].dTVEC,df.iloc[0].noised_pRIR,label="Noisy signal @ SNR={0}dB".format(snr_db))
plt.plot(df.iloc[0].dTVEC,df.iloc[0].clean_pRIR,label="Clean signal",alpha=0.5)

plt.xlim([0,500])
plt.legend()
plt.grid()

plt.subplot(412)
plt.plot(df.iloc[0].dTVEC,envelope(df.iloc[0].dRIR),label="envelope")
plt.plot(df.iloc[0].dTVEC,df.iloc[0].dRIR,label="real")
plt.plot(df.iloc[0].TVEC,np.abs(df.iloc[0].RIR),label="True RIR")
plt.xlim([0,20])
plt.ylim([-0.05,0.1])
plt.legend()
plt.grid()

plt.subplot(413)
plt.plot(df.iloc[0].dTVEC,envelope(df.iloc[0].dRIR),label="envelope ch 0")
plt.plot(df.iloc[1].dTVEC,envelope(df.iloc[1].dRIR),label="envelope ch 1")
plt.xlim([0,20])
plt.ylim([-0.05,0.1])
plt.xlabel('Time [ms]')
plt.legend()

plt.subplot(414)
plt.plot(df.iloc[0].PEAKS,[0]*len(df.iloc[0].PEAKS),
         marker='x',linestyle="None",color='g',label="CH0 Detections")
plt.plot(df.iloc[0].TVEC,np.abs(df.iloc[0].RIR))
# plt.plot(df.iloc[1].PEAKS,[0]*len(df.iloc[1].PEAKS),marker='x',linestyle="None")

plt.xlim([0,20])
plt.ylim([-0.05,0.05])
plt.xlabel('Time [ms]')
plt.legend()
plt.grid()

cc = (df.iloc[0].stft_RIR * df.iloc[1].stft_RIR.conj()) / \
    (1e-6 + np.abs(df.iloc[0].stft_RIR) * np.abs(df.iloc[1].stft_RIR))
plt.show()

<IPython.core.display.Javascript object>

## Mixning boundary time

### Energy decay

![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [14]:
te = df.iloc[0].TVEC

## Check energy only after direct sound
energy_decay_no_ls = df.iloc[0].eRIR/df.iloc[0].eRIR[np.argmin(te<direct_tau + peak_width_ms)]

imix = np.argmax(energy_decay_no_ls<1/np.sqrt(2))
te_mix = te[imix]
stft_nmix =  int(te_mix*fs/1000/Nhop) # mix in STFT frames to take

plt.figure()
plt.title("Correlation energy decay")
plt.plot(te,df.iloc[0].eRIR,label = "Correlation energy decay")
plt.plot(te[imix],df.iloc[0].eRIR[imix],'d',label="Mixing boundary")
plt.xlabel('Time [ms]')
plt.legend()
plt.grid()
plt.show()
print("Estimated diffuse time {0:.2f} [ms]".format(te_mix))
print("FFT frames to take {0}".format(stft_nmix))

<IPython.core.display.Javascript object>

Estimated diffuse time 34.52 [ms]
FFT frames to take 110


### Alternative method - Mising time by peak density
1. Test the number of peaks per winodw.
2. Define critical density as the number of correlation width that can reside in the analysis window

In my mind this method is better suited for our purposes

In [15]:
density_probe_window_ms = 3
time_range = np.arange(0,140,density_probe_window_ms)
density,edges = np.histogram(1000*Taus[0,0]/fs,bins = time_range)
density = window_duration*density/density_probe_window_ms
centers = (edges[:-1] + edges[1:]) / 2

critical_density = 2*window_duration/peak_width_ms #max number of peaks in the window frame

imix = np.argmin(density < critical_density)-1
te_mix = centers[imix]
stft_nmix =  int(te_mix*fs/1000/Nhop) # mix in STFT frames to take

plt.figure()
plt.bar(centers,density,width = 0.7*density_probe_window_ms)
plt.plot(te_mix,critical_density,marker='+',markersize = 10,linestyle='None',
         color = 'r',label="Critical density")
plt.title('Reflection density')
plt.xlabel('Reflection time [ms]')
plt.ylabel('Density per STFT window')
plt.legend()
plt.yscale('log')
plt.grid()
plt.show()
print("Estimated diffuse time {0:.2f} [ms]".format(te_mix))
print("Number of STFT frames {0}".format(stft_nmix))


<IPython.core.display.Javascript object>

Estimated diffuse time 16.50 [ms]
Number of STFT frames 52


## Window for object location

comment - The more microphones you have the bigger the work zone will be

In [16]:
plt.figure()
plt.subplot(211)
plt.plot(df.iloc[0].dTVEC,envelope(df.iloc[0].dRIR),label="envelope ch 0")
plt.plot(df.iloc[1].dTVEC,envelope(df.iloc[1].dRIR),label="envelope ch 1")
plt.xlim([0,te_mix])
plt.ylim([-0.05,0.1])
plt.grid()
plt.subplot(212)
plt.plot(df.iloc[1].dTVEC,np.sqrt(envelope(df.iloc[1].dRIR)*envelope(df.iloc[0].dRIR))
         ,label="Work zones")
plt.xlim([0,te_mix])
plt.ylim([-0.05,0.1])
plt.xlabel('time [ms]')
plt.grid()
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

## Peak detection

### GCC - PHAT

![image.png](attachment:image.png)


![image.png](attachment:image.png)

![image.png](attachment:image.png)

### TOA & TDOA Analysis

Explain $\phi_0$ , $\phi_1$, $\phi_2$

#### When TOA and TDOA?
1. Long distances, TDOA estimiates angle much better.
2. Channel distortions - TDOA is better.
3. TOA is good for measuring distance. For short distances is sufficient. Most simulations in the article refer to large halls (10m x 7m)
5. If you find two TOA peaks... Isn't it like TDOA? NO!

In [17]:
def gcc_phat(vec1,vec2):
    regularization = 1e-3 * np.sqrt(np.max(np.abs(vec1)*np.abs(vec2)))
    vec1n = vec1/(np.abs(vec1) + regularization)
    vec2n = vec2/(np.abs(vec2) + regularization)
    return (vec1n * vec2n.conj())

In [18]:
cc = gcc_phat(df.iloc[0].stft_dRIR,df.iloc[1].stft_dRIR)
_,icc = signal.istft(cc,nperseg=Nfft,noverlap=0,input_onesided = False)
icc = icc.reshape(-1,Nfft).T 

In [19]:
D = to_db(cc)
center_bin = int((Nfft*(fmax-fmin))/fs)
fig, (ax0,ax1) = plt.subplots(2,1,sharex=True)
ax0.pcolormesh(D-np.max(D))
ax0.set_title('Linear-frequency power spectrogram')

ax1.set_title('Center bin power')
ax1.plot(np.abs(cc[center_bin,:]))
ax1.grid()
plt.xlim([0,stft_nmix*3])
plt.plot()
plt.show()

<IPython.core.display.Javascript object>

In [20]:
tcc = tvec(np.arange(Nfft),fs,True)
t_off = 1000*Nfft/fs/2

fig, (ax0,ax1) = plt.subplots(2,1,figsize=(8,5))

# l1, = ax0.plot(tcc+t_off,[0]*len(tcc))
# l2, = ax0.plot(tcc+t_off,[0]*len(tcc))


l1, = ax0.plot(df[(df.rcv==0) & (df.src==0)].dTVEC.iloc[0],
               envelope(df[(df.rcv==0) & (df.src==0)].dRIR.iloc[0]))

l2, = ax0.plot(df[(df.rcv==1) & (df.src==0)].dTVEC.iloc[0],
               envelope(df[(df.rcv==1) & (df.src==0)].dRIR.iloc[0]))


ax0.plot(df[(df.rcv==0) & (df.src==0)].TAU.iloc[0],
         [0]*len(df[(df.rcv==0) & (df.src==0)].TAU.iloc[0]),'.',
         color = l1.get_color(),label='CH0 GT')
ax0.plot(df[(df.rcv==0) & (df.src==0)].PEAKS.iloc[0],
         [0]*len(df[(df.rcv==0) & (df.src==0)].PEAKS.iloc[0]),'x',
         color = l1.get_color(),label="CH0 Det")

ax0.plot(df[(df.rcv==1) & (df.src==0)].TAU.iloc[0],
         [0]*len(df[(df.rcv==1) & (df.src==0)].TAU.iloc[0]),'.',
         color = l2.get_color(),label='CH1 GT')

ax0.plot(df[(df.rcv==1) & (df.src==0)].PEAKS.iloc[0],
         [0]*len(df[(df.rcv==1) & (df.src==0)].PEAKS.iloc[0]),'x',
         color = l2.get_color(),label="CH1 Det")

ax0.set_ylim([-0.2,0.1])

ax0.set_xlabel('Time [ms]')
ax0.legend()
ax0.grid()

l3, = ax1.plot(tcc,[0]*len(tcc),label='GCC-PHAT envelope')
l5, = ax1.plot([],[],'d',color=l3.get_color())



ax1.legend()
ax1.set_xlabel('Tau [ms]')
ax1.grid()
plt.tight_layout()
plt.show()

def update(idx = 0):
    icc = np.fft.fftshift(np.fft.ifft(cc[:,idx+2]))
    t_off = 1000*idx*Nhop/fs + 1000*Nfft/fs/2
    
    tau0 = df[(df.rcv==0) & (df.src==0)].TAU.iloc[0]
    tau1 = df[(df.rcv==1) & (df.src==0)].TAU.iloc[0]
    
    tpeak0 = df[(df.rcv==0) & (df.src==0)].PEAKS.iloc[0]
    tpeak1 = df[(df.rcv==1) & (df.src==0)].PEAKS.iloc[0]
    
    
    tpeak0 = tpeak0[(tpeak0 > (t_off + tcc.min())) & 
                            (tpeak0 < (t_off + tcc.max()))]

    tpeak1 = tpeak1[(tpeak1 > (t_off + tcc.min())) & 
                        (tpeak1 < (t_off + tcc.max()))]

    
    tau0_cutoff = tau0[(tau0 > (t_off + tcc.min())) & 
                            (tau0 < (t_off + tcc.max()))]

    tau1_cutoff = tau1[(tau1 > (t_off + tcc.min())) & 
                        (tau1 < (t_off + tcc.max()))]
    
    true_tau_diff = tau0-tau1
    
    true_tau_diff = true_tau_diff[(tau1 > (t_off + tcc.min())) & 
                        (tau1 < (t_off + tcc.max()))]    
    
    l3.set_ydata(envelope(np.real(icc)))    
    l5.set_data(true_tau_diff,[0]*len(true_tau_diff))
    
    
    ax0.set_xlim([t_off+tcc.min(),t_off+tcc.max()])
    ax1.set_ylim([-0.0,min(0.5,1.1*np.max(envelope(np.real(icc))))])
    
interact(update,idx=(0,stft_nmix,1))


<IPython.core.display.Javascript object>

interactive(children=(IntSlider(value=0, description='idx', max=52), Output()), _dom_classes=('widget-interact…

<function __main__.update(idx=0)>

In [21]:
# print(tpeak0)

## Localization

- Several methods are mentioned in the article. 
- We selected the MLE method and peak detection. 
- It was stated that in real life situation other methods may outperform MLE as they use the "whole" signal.
- My claim is that in the article they didn't use linear assignment as a way to mix the peaks. It substantially improves robustness against errors

### Global MLE solution

![image.png](attachment:image.png)

Where $M$ is the number of all possible mic combinations

![image.png](attachment:image.png)

![image.png](attachment:image.png)

![image.png](attachment:image.png)

![image.png](attachment:image.png)

Emil's notes: This case is trivial and not interesting but it presents the "problem"
1. There's only one reflector
2. There's no noise. 
3. Examining the TDOA the peak is unrealistic. It's only possible if there's substantial $\phi_2$ or no noise at all. From my experience when the distance is 2x the aperture $\phi_2$ drops like crazy 

#### The array in the article
![image.png](attachment:image.png)

## Image method

![image.png](attachment:image.png)

![image.png](attachment:image.png)

Emil's note: This assumes that the walls meet at (0,0,0)

### optimization -  My suggestion

1. Find all channel peaks in the pre mixing range
2. Find all TDOA peaks 
3. Per RoomSize = {Lx,Ly,Lz}, Array orientation = {$\theta_x$,$\theta_y$,$\theta_z$} and Array center = {x,y,z} find the minimum cost of:
    * Find possible reflection TAUs by geometry
    * Do linear assignment - Hungarian
    * Find L1 cost


In [22]:
def get_tau_set(src,tar,reflection_orders,rm_dims):
    def triplet_combinations(base_vec):
        vec = np.stack(np.meshgrid(base_vec,base_vec,base_vec))
        vec = np.stack(np.meshgrid(base_vec,base_vec,base_vec))
        return vec.reshape(3,len(base_vec)**3).T

    tau_test = []
    for sgn_line in triplet_combinations([-1,1]):
        rp = src + sgn_line*tar
        for nlm in triplet_combinations(reflection_orders):
            rr = 2*nlm*rm_dims
            t = np.linalg.norm(rp+rr)/343
            tau_test.append(t)
    tau_test = np.array(tau_test)
    return tau_test

# tau_test    =  get_tau_set(pos_src,pos_rcv[0],
#                            np.arange(-2,3),room_sz)



In [23]:
def calc_diff_by_windows(tau0,tau1,tmax,winsize,winshift):
   
    tau0_trim = tau0[tau0 < tmax]
    tau1_trim = tau1[tau1 < tmax]
    
    min_len = min(len(tau0_trim),len(tau1_trim))
    
    tau0_trim = tau0_trim[:min_len]
    tau1_trim = tau1_trim[:min_len]

    tdiff_all = tau0_trim - tau1_trim
    
    tdiff = []

    t_end = 0
    t_start = 0
    idx = 0
    
    while t_end < tmax:
        t_start = idx*winshift
        t_end = t_start + winsize
        idx +=1
        
        tdiff.append(tdiff_all[ (tau0_trim >= t_start) & (tau0_trim < t_end) ])
    return tdiff
        
#         t_off = 1000*idx*Nhop/fs + 1000*Nfft/fs/2

In [41]:
## Create data set to from GT

filter0 = (df[(df.rcv==0) & (df.src ==0)].TAU.iloc[0] < te_mix)# & \
#           (df[(df.rcv==0) & (df.src ==0)].TAU.iloc[0] > direct_tau + peak_width_ms)
filter1 = (df[(df.rcv==1) & (df.src ==0)].TAU.iloc[0] < te_mix)# & \
          #(df[(df.rcv==1) & (df.src ==0)].TAU.iloc[0] > direct_tau + peak_width_ms)

tau0_cutoff =  df[(df.rcv==0) & (df.src ==0)].TAU.iloc[0][filter0]
tau1_cutoff =  df[(df.rcv==1) & (df.src ==0)].TAU.iloc[0][filter1]

tau01_diff  =  calc_diff_by_windows(df[(df.rcv==0) & (df.src ==0)].TAU.iloc[0],
                         df[(df.rcv==1) & (df.src ==0)].TAU.iloc[0],te_mix,
                         1000*Nfft/fs/2,1000*Nhop/fs)

# ## Clean version
jitter_tau_ms = 0
drop_rate = 0
addition_rate = 0

## Noisy version
jitter_tau_ms = peak_width_ms/12
drop_rate = 0.00
addition_rate = 0.2

### Add jitter 
tau0_cutoff += jitter_tau_ms * np.random.randn(*tau0_cutoff.shape)
tau1_cutoff += jitter_tau_ms *np.random.randn(*tau1_cutoff.shape)

## Drop Taus
tau0_cutoff = np.random.choice(tau0_cutoff,int(len(tau0_cutoff)*(1-drop_rate)),replace=False)
tau1_cutoff = np.random.choice(tau1_cutoff,int(len(tau1_cutoff)*(1-drop_rate)),replace=False)

## Add Taus
tau0_add = np.random.uniform(0,te_mix,int(len(tau0_cutoff)*addition_rate))
tau1_add = np.random.uniform(0,te_mix,int(len(tau1_cutoff)*addition_rate))
tau0_cutoff = np.hstack((tau0_cutoff,tau0_add))
tau1_cutoff = np.hstack((tau1_cutoff,tau1_add))


In [25]:
## Create data set to from PEAKS

filter0 = (df[(df.rcv==0) & (df.src ==0)].PEAKS.iloc[0] < te_mix)# & \
filter1 = (df[(df.rcv==1) & (df.src ==0)].PEAKS.iloc[0] < te_mix)# & \

tau0_cutoff =  df[(df.rcv==0) & (df.src ==0)].PEAKS.iloc[0][filter0]
tau1_cutoff =  df[(df.rcv==1) & (df.src ==0)].PEAKS.iloc[0][filter1]


In [42]:
%%time

def toa_cost_func(s_pos):
    # Target positions
    t_pos0 = s_pos + pos_rcv[0] - pos_src 
    t_pos1 = s_pos + pos_rcv[1] - pos_src

    tau_test0 = get_tau_set(s_pos,t_pos0,np.arange(-2,3),room_sz)
    tau_test1 = get_tau_set(s_pos,t_pos1,np.arange(-2,3),room_sz)
    
    tau_test0 = 1000*tau_test0[1000*tau_test0<te_mix]
    tau_test1 = 1000*tau_test1[1000*tau_test1<te_mix]
    
    
    tau_stack0 = np.stack(np.meshgrid(tau0_cutoff,tau_test0))
    tau_stack1 = np.stack(np.meshgrid(tau1_cutoff,tau_test1))
    
    ## L1 criteria
    cost0 = np.abs(np.diff(tau_stack0,axis=0)).squeeze()
    cost1 = np.abs(np.diff(tau_stack1,axis=0)).squeeze()
    
    ## L2 criteria
#     cost0 = np.abs(np.diff(tau_stack0,axis=0)**2).squeeze()
#     cost1 = np.abs(np.diff(tau_stack1,axis=0)**2).squeeze()
    
    ## Linear assignment solution
    row_ind0, col_ind0 = linear_sum_assignment(cost0)
    row_ind1, col_ind1 = linear_sum_assignment(cost1)
    
    loss = cost0[row_ind0, col_ind0].sum() + cost1[row_ind1, col_ind1].sum()
    return loss   


## Optimal 2D positioning
res = 0.1
x_range = np.arange(0,4,res)
y_range = np.arange(0,4,res)

xyz = np.stack(np.meshgrid(x_range,y_range,[1]))
xyz = xyz.reshape(3,-1).T


## Optimal 2D positioning -  single process
# loss_result = np.zeros(len(xyz))
# for idx, s_pos in enumerate(xyz):
#     loss = toa_cost_func(s_pos)
#     loss_result[idx] = loss

## Optimal 2D positioning -  multiprocessing
with Pool(processes=6) as pool:
    loss_result = pool.starmap(toa_cost_func, zip(xyz))

loss_result = np.array(loss_result).reshape(len(x_range),len(y_range))

CPU times: user 31.1 ms, sys: 77.4 ms, total: 108 ms
Wall time: 17.7 s


In [43]:
plt.figure(figsize=(8,6))
plt.title("2D example")
cm = plt.pcolor(x_range-res/2,y_range-res/2,np.log(loss_result))
plt.colorbar(format="%2.0f")
plt.plot(pos_src[:,0],pos_src[:,1],marker='x',color='r',label = 'Source',linestyle='None')
plt.plot(pos_rcv[:,0],pos_rcv[:,1],marker='.',color='r',label = 'Mics',linestyle='None')
plt.plot([0,room_sz[0],room_sz[0],0,0],[0,0,room_sz[1],room_sz[1],0],
         color='k',label='Room')

peaks_2d= peak_local_max(-loss_result,min_distance=5,num_peaks=12)
for pk in peaks_2d:
    llr = np.log(loss_result[pk[0],pk[1]])
    plt.scatter(y_range[pk[1]],x_range[pk[0]], s=60, 
                facecolors='none', edgecolors='m')
    plt.text(y_range[pk[1]],x_range[pk[0]],"{0:.1f}".format(llr),
             verticalalignment='bottom',horizontalalignment = 'right',color='w')


plt.legend()
plt.xlabel("x[m]")
plt.ylabel("y[m]")
plt.show()





<IPython.core.display.Javascript object>

In [28]:
## TDOA optimization - WIP
# tau_test0 = get_tau_set(pos_src[0],pos_rcv[0],np.arange(-2,3),room_sz)
# tau_test1 = get_tau_set(pos_src[0],pos_rcv[1],np.arange(-2,3),room_sz)

# tau_test0 = 1000*tau_test0[1000*tau_test0<te_mix]
# tau_test1 = 1000*tau_test1[1000*tau_test1<te_mix]

# tau_test_diff = calc_diff_by_windows(tau_test0,tau_test1,te_mix,
#                          1000*Nfft/fs/2,1000*Nhop/fs)

# tau01_diff  =  calc_diff_by_windows(tau0_cutoff,tau1_cutoff,te_mix,
#                          1000*Nfft/fs/2,1000*Nhop/fs)


# for diff_test, diff in zip(tau_test_diff,tau01_diff):
#     print(diff_test,diff)

# tau_test1 = get_tau_set(s_pos,t_pos1,np.arange(-2,3),room_sz)


### TODO
1. Solve the problem with TAUs directly
    * Optimize with TDOA
    * Add more mics to reduce ambiguity
        * Optimize room size
        * Optimize orientation
    * For TDOA take critical density lower than for TOA

2. Solver with actual peaks in graph
    * Effect of noise - white and diffused
    * Peaks detection level based on noise (minima tracking)
    * Add peak power as weight to peaks
    * Add TDOA to the optimization

3. Read in the whole article
    * Room size estimation

### Summary
1. Room acoustics
    * Early and Late reflections
    * Diffused v.s. Specular
    * Frequency dependence
    * Noise sources
        - Diffused noise $\sin(d)/d$ 
        - White noise - microphone noise floor.
        - Don't know which dominates at 30kHz?
    * Microphone calibration. Phase detireoration

2. Sampling signal
    * Bandwidth
    * Analysis window
    * Energy decay threshold
    * Sampling with the carrier v.s. envelope
    * Optimal STFT window
    *
    
3. TDOA and TOA
    * $\phi_0$ , $\phi_1$, $\phi_2$
    * Radial and angular 
4. Room Localization
    * Ambiguities and ghosts
        - Array ambiguities
        - Localization ghosts
    * Peak drops have much bigger effect than additions
    * More mics:
        * Better work zone
        * Much less ambiguities
        * Better SNR
5. My personal opoinion:
    * We need to do an experiment
        - Noise level
        - Reverb level
        - Actual effects
    * Try model less tracking in live experiment
        
    
