In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from tqdm import tqdm

import pyroomacoustics as pra
import itertools

import eval_tools as eval

Used the data of experiment 3, but use threshold to find the number of sources.

In [6]:
# load data
exp_df = pd.read_csv('results/experiment_results_exp-3.csv')
# laod picke file with the ground truth

with open('results/experiment_results_exp-3_with_ang_specs.pkl', 'rb') as f:
    ang_specs = pickle.load(f)
    
print(exp_df.shape)
print(len(ang_specs))

doa_grid_rad = np.deg2rad(np.arange(0, 360, 6))

(45900, 28)
5100


In [7]:
ang_specs_dicts = {}
for d in tqdm(ang_specs):
    # if the frame id is not in the dict, add it
    if d['frame_id'] not in ang_specs_dicts:
        ang_specs_dicts[d['frame_id']] = {}
    # add the method id to the frame id
    ang_specs_dicts[d['frame_id']][d['method_id']] = d

100%|██████████| 5100/5100 [00:00<00:00, 1339949.29it/s]


In [8]:
def detect_peaks(values):
    # make circular
    n_points = len(values)
    val_ext = np.append(values, values[:10])

    # run peak finding
    indexes = pra.doa.detect_peaks(val_ext, show=False) % n_points
    candidates = np.unique(indexes)  # get rid of duplicates, if any
    return candidates

In [None]:
thresholds = [0.05, 0.1, 0.2, 0.5, 0.8]

df_results = pd.DataFrame()

for t in thresholds:
    for frame_id in tqdm(ang_specs_dicts.keys(), desc='Iterating over frames for thr {}'.format(t)):
        for method_id in ang_specs_dicts[frame_id].keys():
                        
            ang_spec = ang_specs_dicts[frame_id][method_id]['ang_spec']
            
            ang_spec = np.mean(np.array(ang_spec), -1)
            ang_spec = ang_spec / np.max(ang_spec)
            
            # set the threshold
            ang_spec[ang_spec < t] = 0
            
            # find peaks
            peaks = detect_peaks(ang_spec)      
            
            df_ = pd.DataFrame({
                'frame_id': frame_id,
                "method_id": method_id,
                'threshold': t,
                'peaks_locations': peaks.tolist(),
                'n_peaks': len(peaks)
            })
            df_results = pd.concat([df_results, df_], ignore_index=True)
            
print(len(df_results))

Iterating over frames for thr 0.05: 100%|██████████| 150/150 [00:01<00:00, 96.72it/s]
Iterating over frames for thr 0.1: 100%|██████████| 150/150 [00:02<00:00, 60.59it/s]
Iterating over frames for thr 0.2: 100%|██████████| 150/150 [00:03<00:00, 48.34it/s]
Iterating over frames for thr 0.5: 100%|██████████| 150/150 [00:03<00:00, 41.05it/s]
Iterating over frames for thr 0.8: 100%|██████████| 150/150 [00:03<00:00, 38.97it/s]

88769





In [10]:
# merge the results_df with the exp_df on the frame_id and method_id
df_merge = df_results.merge(exp_df, on=['frame_id', 'method_id'])
df_merge

Unnamed: 0,frame_id,method_id,threshold,peaks_locations,n_peaks,exp_name,time,record_id,num_srcs,src_ids,...,noise_type,add_reverberation,mc_seed,loc_method,freq_min,freq_max,sv_method,nObs,seed,sv_normalization
0,nSrc-1_doas-[44]_type-speech-duration-0.5-snr-...,"alpha-2.0_beta-2_eps-1E-3_iter-500_freqs-[200,...",0.05,18,2,exp-3_nSrc-1_doas-[44]_type-speech-duration-0....,20250306-123716,s0,1,0,...,awgn,False,0,alpha-2.0_beta-2_eps-1E-3_iter-500,200,4000,ref,8,13,True
1,nSrc-1_doas-[44]_type-speech-duration-0.5-snr-...,"alpha-2.0_beta-2_eps-1E-3_iter-500_freqs-[200,...",0.05,45,2,exp-3_nSrc-1_doas-[44]_type-speech-duration-0....,20250306-123716,s0,1,0,...,awgn,False,0,alpha-2.0_beta-2_eps-1E-3_iter-500,200,4000,ref,8,13,True
2,nSrc-1_doas-[44]_type-speech-duration-0.5-snr-...,"alpha-2.0_beta-2_eps-1E-3_iter-500_freqs-[200,...",0.05,12,6,exp-3_nSrc-1_doas-[44]_type-speech-duration-0....,20250306-123716,s0,1,0,...,awgn,False,0,alpha-2.0_beta-2_eps-1E-3_iter-500,200,4000,alg,8,13,True
3,nSrc-1_doas-[44]_type-speech-duration-0.5-snr-...,"alpha-2.0_beta-2_eps-1E-3_iter-500_freqs-[200,...",0.05,20,6,exp-3_nSrc-1_doas-[44]_type-speech-duration-0....,20250306-123716,s0,1,0,...,awgn,False,0,alpha-2.0_beta-2_eps-1E-3_iter-500,200,4000,alg,8,13,True
4,nSrc-1_doas-[44]_type-speech-duration-0.5-snr-...,"alpha-2.0_beta-2_eps-1E-3_iter-500_freqs-[200,...",0.05,29,6,exp-3_nSrc-1_doas-[44]_type-speech-duration-0....,20250306-123716,s0,1,0,...,awgn,False,0,alpha-2.0_beta-2_eps-1E-3_iter-500,200,4000,alg,8,13,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
290637,nSrc-5_doas-[43 24 12 22 7]_type-speech-durat...,"alpha-1.2_beta-2_eps-1E-3_iter-500_freqs-[200,...",0.80,23,1,exp-3_nSrc-5_doas-[43 24 12 22 7]_type-speech...,20250306-125325,s0,5,0,...,awgn,False,14,alpha-1.2_beta-2_eps-1E-3_iter-500,200,4000,gp-steerer,128,666,True
290638,nSrc-5_doas-[43 24 12 22 7]_type-speech-durat...,"alpha-1.2_beta-2_eps-1E-3_iter-500_freqs-[200,...",0.80,23,1,exp-3_nSrc-5_doas-[43 24 12 22 7]_type-speech...,20250306-125325,s1,5,1,...,awgn,False,14,alpha-1.2_beta-2_eps-1E-3_iter-500,200,4000,gp-steerer,128,666,True
290639,nSrc-5_doas-[43 24 12 22 7]_type-speech-durat...,"alpha-1.2_beta-2_eps-1E-3_iter-500_freqs-[200,...",0.80,23,1,exp-3_nSrc-5_doas-[43 24 12 22 7]_type-speech...,20250306-125325,s2,5,2,...,awgn,False,14,alpha-1.2_beta-2_eps-1E-3_iter-500,200,4000,gp-steerer,128,666,True
290640,nSrc-5_doas-[43 24 12 22 7]_type-speech-durat...,"alpha-1.2_beta-2_eps-1E-3_iter-500_freqs-[200,...",0.80,23,1,exp-3_nSrc-5_doas-[43 24 12 22 7]_type-speech...,20250306-125325,s3,5,3,...,awgn,False,14,alpha-1.2_beta-2_eps-1E-3_iter-500,200,4000,gp-steerer,128,666,True


In [11]:
df_merge.columns

Index(['frame_id', 'method_id', 'threshold', 'peaks_locations', 'n_peaks',
       'exp_name', 'time', 'record_id', 'num_srcs', 'src_ids', 'doas_est_idx',
       'doas_ref_idx', 'doas_ref_az', 'doas_est_az', 'doas_ref_el',
       'doas_est_el', 'errors', 'target_doa', 'n_sources', 'duration', 'snr',
       'noise_type', 'add_reverberation', 'mc_seed', 'loc_method', 'freq_min',
       'freq_max', 'sv_method', 'nObs', 'seed', 'sv_normalization'],
      dtype='object')

In [12]:
# for each frame id and method id, get the assorciate data_frame
frame_ids = df_results['frame_id'].unique()
method_ids = df_results['method_id'].unique()

ang_error_thr = 10

results_thr_list = []

for frame_id, method_ids in itertools.product(frame_ids, method_ids):
    df_ = df_merge.loc[
          (df_merge['frame_id'] == frame_id) 
        & (df_merge['method_id'] == method_id)
    ]
    
    thrs = df_['threshold'].unique()
    
    for thr in thrs:
        estimated_peaks = df_.loc[df_['threshold'] == thr, 'peaks_locations']
        estimated_azimuths = np.rad2deg(doa_grid_rad[estimated_peaks])
        target_azimuths = np.rad2deg(df_.loc[df_['threshold'] == thr, 'doas_ref_az'])
    
        
        n_estimated = len(estimated_azimuths)
        n_target = len(target_azimuths)

        metrics = eval.compute_metrics(estimated_azimuths, target_azimuths, ang_error_thr)

        metrics['frame_id'] = frame_id
        metrics['method_id'] = method_id
        metrics['threshold'] = thr
        metrics['ang_error_thr'] = ang_error_thr
        
        results_thr_list.append(metrics)
        
df_results_thr = pd.DataFrame(results_thr_list)

In [13]:
# merge the results_df with the exp_df on the frame_id and method_id
df_results_thr = df_results_thr.merge(exp_df, on=['frame_id', 'method_id'])

In [None]:
for frame_id, method_ids in itertools.product(frame_ids, method_ids):
    
    df_ = df_results_thr.loc[
          (df_results_thr['frame_id'] == frame_id) 
        & (df_results_thr['method_id'] == method_id)
        & (df_results_thr['n_sources'] == 3)
        & (df_results_thr['duration'] == 0.5)
    ]
    
    if len(df_) == 0:
        continue
    
    print(len(df_))
    print(frame_id)
    print(method_id)
    n_sources = df_
    
    
    thrs = df_['threshold'].unique()

    for thr in thrs:
        precision = df_.loc[df_['threshold'] == thr, 'precision'].values
        recall = df_.loc[df_['threshold'] == thr, 'recall'].values
        print(precision)
        print(recall)
        1/0

510
nSrc-3_doas-[44 47 53]_type-speech-duration-0.5-snr-20_noise-awgn_reverb-False_mc-0
alpha-1.2_beta-2_eps-1E-3_iter-500_freqs-[200, 4000]_gp-steerer_nObs-128_seed-666_norm-True
[0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667
 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667
 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667
 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667
 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667
 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667
 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667
 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667
 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667
 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667
 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667
 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667 0.66666667
 0.66666667 0.66

ZeroDivisionError: division by zero