In [None]:
import math
import sys

import IPython
import IPython.display as ipd
import matplotlib.pylab as plt
import numpy as np
import pandas as pd

%reload_ext autoreload
%autoreload 2

%matplotlib inline
#%matplotlib notebook

from matplotlib import rcParams
rcParams["figure.max_open_warning"] = False

In [None]:
def plot_spectrogram(spec, freqs, max_freq=5000, min_freq=1000, ax=None):
    if ax is None:
        fig, ax = plt.subplots()
        fig.set_size_inches(10, 5)
        
    if spec.ndim > 2:
        mean_spec = np.mean(spec, axis=-2) # average over mics
    else:
        mean_spec = spec
        
    mask = (freqs < max_freq) & (freqs > min_freq)
    ax.pcolorfast(range(mean_spec.shape[1]), freqs[mask], np.log10(mean_spec[mask, :]))
    ax.set_ylabel('frequency [Hz]')
    return ax

# First study

In [None]:
from evaluate_data import read_df_from_wav, read_signal_from_wav
from frequency_analysis import get_spectrogram

freq = 1750
position_dict = {
    3: 'upper',
    #4: 'upper',
    5: 'current',
    #6: 'current',
    7: 'lower',
    #8: 'lower',
    14: 'lowest',
    #15: 'lowest',
}

In [None]:
base_filename = f'../experiments/2021_01_07_snr_study/export/motors_nosnr_noprops_mono{freq}'
window="hann"

df_specs = pd.DataFrame(columns=['number', 'spec', 'freqs', 'signal_time'])
for number, position_name in position_dict.items():
    appendix = '' if number==0 else f'_{number}'
    try:
        filename = f'{base_filename}{appendix}.wav'
        df_wav = read_df_from_wav(filename, method_window=window)
        signals_wav = read_signal_from_wav(filename) # n_times, n_mics, n_buffer
    except FileNotFoundError:
        print('skipping', filename)
        continue
        
    spec, freqs = get_spectrogram(df_wav)
    
    ax = plot_spectrogram(spec, freqs, max_freq=5000)
    ax.set_title(position_name)
    
    df_specs.loc[len(df_specs), :] = {
        'number': number,
        'spec':spec,
        'freqs':freqs,
        'signal_time': signals_wav
    }

In [None]:
time_window_dict =  {
    'buzzer only': (range(10, 20), '-', 'C0'),
    'buzzer and props': (range(80, 90), ':', 'C1')
}
max_freq = 16000

figs, axs = plt.subplots(1, len(df_specs), sharey=True, squeeze=False)
[ax.grid(which='both') for ax in axs.flatten()]
figs.set_size_inches(20, 5)

figs_time, axs_time = plt.subplots(1, len(df_specs), sharey=True, squeeze=False)
[ax.grid(which='both') for ax in axs_time.flatten()]
figs_time.set_size_inches(20, 5)

for title, (time_window, ls, color) in time_window_dict.items():
    
    fig, ax  = plt.subplots()
    fig.set_size_inches(15, 5)
    
    for i, row in df_specs.iterrows():
        spec = np.mean(row.spec, axis=-2)
        freqs = row.freqs
        label = position_dict[row.number] + ' position'
        signal = row.signal_time[time_window[0], 0, :]
        spec_window = spec[:, time_window] 
        
        ax.semilogy(freqs, np.mean(spec_window, axis=1), label=label, color=f'C{i}')
        
        axs[0, i].semilogy(freqs, np.mean(spec_window, axis=1), color=color, label=title)
        axs[0, i].set_xlim(0, max_freq)
        axs[0, i].set_title(label)
        axs[0, i].set_xlabel('frequency [Hz]')
        axs[0, i].legend(loc='upper right')
        axs[0, i].set_ylabel('loudness')
        
        axs_time[0, i].plot(signal, label=title, color=color)
        axs_time[0, i].set_xlabel('time idx [-]')
        axs_time[0, i].set_title(label)
    
    ax.legend(loc='upper right')
    ax.set_title(title)
    ax.grid(which='major')
    ax.set_xlabel('frequency [Hz]')
    ax.set_xlim(0, max_freq)
    fname = f"plots/snr_study_{title.replace(' ', '_')}.pdf"
    fig.savefig(fname, bbox_inches='tight')
    print('saved as', fname)
    
fname = f"plots/snr_study_side_by_side.pdf"
figs.savefig(fname, bbox_inches='tight')
print('saved as', fname)

fname = f"plots/snr_study_side_by_side_time.pdf"
figs_time.savefig(fname, bbox_inches='tight')
print('saved as', fname)

# Second study

The external buzzer is used to play sweeps and we use both 45000 and 55000 for the propeller speeds.

In [None]:
#exp_dir = '2021_01_14_snr_study'
#exp_dir = '2021_01_15_snr_study' # lowest position
#exp_dir = '2021_01_15_snr_study_2' # standard position
#exp_dir = '2021_01_15_snr_study_3' # with paper
exp_dir = '2021_01_15_snr_study_4' # with final paper
#exp_dir = '2021_01_21_snr_study'
exp_dict = {
    '45000': 'motors_nosnr_noprops_None.wav',
    '55000': 'motors_nosnr_noprops_None_55000.wav',
    'mono': 'nomotors_nosnr_noprops_mono3125.wav',
    'mono 55000': 'motors_nosnr_noprops_mono3125_55000.wav',
    'mono 45000': 'motors_nosnr_noprops_mono3125.wav',
    'sweep': 'nomotors_nosnr_noprops_sweep.wav',
    'sweep 45000': 'motors_nosnr_noprops_sweep.wav',
}

freq = 3125
start_time = 67
base_filename = f'../experiments/{exp_dir}/export/'
window = "hann"

df_specs = pd.DataFrame(columns=['title', 'spec', 'freqs', 'signal_time'])
for title, fname in exp_dict.items():
    try:
        filename = base_filename + fname
        df_wav = read_df_from_wav(filename, method_window=window)
        signals_wav = read_signal_from_wav(filename) # n_times, n_mics, n_buffer
        
    except FileNotFoundError:
        print('skipping', filename)
        continue
    spec, freqs = get_spectrogram(df_wav)
    
    ax = plot_spectrogram(spec, freqs, max_freq=5000)
    ax.set_title(title)
    
    signal = signals_wav[start_time, 0, :]
    plt.figure()
    plt.plot(signal)
    plt.title(f'time signal at idx: {start_time}')
    #plt.ylim(-1, 1)
    
    df_specs.loc[len(df_specs), :] = {
        'title': title,
        'spec':spec,
        'freqs':freqs,
        'signal_time': signals_wav
    }

In [None]:
title_list = ['mono', '55000', 'mono 55000']
title_list = ['mono', '45000', 'mono 45000']
ls_list = [':', ':', ':']
max_freq = 5000
min_freq = 100

fig, ax = plt.subplots()
for title, ls in zip(title_list, ls_list):
    row = df_specs.loc[df_specs.title==title].iloc[0]
    
    spec = np.mean(row.spec[..., start_time:], axis=-2)
    freqs = row.freqs
    #ax_spec = plot_spectrogram(spec, row.freqs)
    #ax_spec.set_title(title)
    #ax.loglog(row.freqs, np.mean(spec, axis=1), label=title)
    ax.semilogy(freqs, np.mean(spec, axis=1), label=title, ls=ls)
ax.set_title('average amplitude')
ax.legend(loc='upper right')
ax.set_xlabel('frequency [Hz]')
ax.set_xlim(min_freq, max_freq)

fig, ax = plt.subplots()
spec_monoprops = np.mean(df_specs.loc[df_specs.title=='mono 45000'].iloc[0].spec[..., start_time:], axis=-2)
spec_props = np.mean(df_specs.loc[df_specs.title=='45000'].iloc[0].spec[..., start_time:], axis=-2)
ax.semilogy(freqs, np.mean(spec_monoprops, axis=1)/np.mean(spec_props, axis=1), label='ratio')
ax.legend(loc='upper right')
ax.set_xlabel('frequency [Hz]')
ax.set_title('ratio of mono 45000 vs. 45000')
ax.set_xlim(min_freq, max_freq)

# propeller noise study

In [None]:
plt.figure()
for i, thrust in enumerate([45000, 55000]):
    row = df_specs.loc[df_specs.title==str(thrust)].iloc[0]
    spec = np.mean(row.spec[..., start_time:], axis=-1)
    
    mean = np.mean(spec, axis=1)
    std = np.std(spec, axis=1)
    
    plt.semilogy(row.freqs, mean, color=f'C{i}', label=f'all{thrust}')
    plt.semilogy(row.freqs, mean+std, color=f'C{i}', ls=':')
plt.xlim(100, 5000)
plt.ylim(1e-2, 1e2)
plt.legend()
plt.title(f'comparison propeller noise')
pass

# sweep study - wav

clean pipeline without propellers:  
 1. get the spectrogram (already done)
 2. sort the bins to find frequencies_matrix.
 3. apply this mask to find the psd

In [None]:
def get_index_matrix(spec):
    spec_avg = np.mean(spec, axis=-2)
    index_matrix = np.argsort(spec_avg, axis=0)[::-1]
    return index_matrix


def get_shifted_index_matrix(index_matrix, new_spec):
    """ Find the maximum-energy shift of index_matrix for a 
    different spectrogram. For example, get index_matrix for sweep on 
    propeller-less dataset, and then apply it to dataset with propellers 
    on. 
    """
    def shifted_mask(spec_shape, max_bins, shift):
        print(f'shift {max_bins.shape} by {shift} in {spec.shape}')
        n_valid = min(spec_shape[1] - shift, len(max_bins))
        mask = np.zeros(spec_shape)
        mask[max_bins[:n_valid], np.arange(n_valid)+shift] = 1
        return mask

    spec = np.mean(new_spec, axis=-2)

    max_bins = index_matrix[0, :]
    current_max = 0
    current_best = 0
    for shift in range(min(spec.shape[1], 200)):
        mask = shifted_mask(spec.shape, max_bins, shift)
        sum_ = np.sum(mask * spec)/np.sum(mask)
        if sum_ > current_max:
            current_max = sum_
            current_mask = mask
            current_best = shift

    return index_matrix[:, current_best:]

    fig, ax = plt.subplots()
    fig.set_size_inches(10, 5)
    ax.pcolorfast(current_mask*spec)
    ax.set_ylim(200, 300)
    ax.set_ylim(100, 500)
    ax.set_title(f'best shift: {current_best}')

In [None]:
sweep_study = pd.DataFrame(columns=['psd', 'freqs', 'std', 'name'])

# 1. sort bins
index_matrix = get_index_matrix(row_sweep.spec)
n_freq = 1
row_sweep = df_specs.loc[df_specs.title=="sweep"].iloc[0]

plt.figure()
for i in range(n_freq):
    plt.plot(range(index_matrix.shape[1]), index_matrix[i])

In [None]:
# 2. extract psd_df
import seaborn as sns
from frequency_analysis import extract_psd, psd_df_from_spec

spec = row_sweep.spec

psd_df = psd_df_from_spec(spec, row_sweep.freqs, index_matrix, 
                          min_t=270, max_t=550, n_freq=n_freq)
#psd_df = extract_psd_df(signals_f, mask_psd, min_t=250, max_t=520)
#print(psd_df.dtypes)
sns.scatterplot(data=psd_df, x='frequency', y='magnitude', hue='counter')
plt.yscale('log')
plt.xlim(2000, 5000)

plt.figure()
label = 'max'
for (i_mic, f), df in psd_df.groupby(['mic', 'frequency']):
    values = df.magnitude.values
    plt.scatter(f, np.max(values), color='C0', label=label)
    label=None
    
psd, freqs, psd_std = extract_psd(psd_df, method='median-reject')
sweep_study.loc[len(sweep_study), :] = dict(
    psd=psd,
    freqs=freqs,
    std=std,
    name='jack, sweep, nomotors'
)
for i_mic in range(psd.shape[0]):
    plt.plot(freqs, psd[i_mic, :], label='median-reject', color='C1')
plt.yscale('log')
plt.legend()
plt.xlim(2000, 5000)
pass

clean pipeline with propellers:  
 1. get the spectrogram (already done)
 2. align frequencies matrix from before
 3. apply this mask to find the psd

In [None]:
#fig_all, ax_all = plt.subplots()
row_sweep_motors = df_specs.loc[df_specs.title == 'sweep 45000'].iloc[0]
freqs = row_sweep_motors.freqs
spec_motors = row_sweep_motors.spec
ax = plot_spectrogram(spec_motors, freqs, max_freq=5000)
ax.set_title("spectrogram with motors")

shift_index_matrix = get_shifted_index_matrix(index_matrix, row_sweep_motors.spec)
ax.plot(x, freqs[shift_index_matrix[0]], color='white', ls=':')
ax.set_ylim(1000, 5000)

In [None]:
fig_all, ax_all = plt.subplots()
psd_df = psd_df_from_spec(row_sweep_motors.spec, 
                          row_sweep_motors.freqs, 
                          shift_index_matrix, 
                          min_t=300, max_t=520, n_freq=1)
psd, freqs, stds = extract_psd(psd_df)
sweep_study.loc[len(sweep_study), :] = dict(
    psd=psd,
    freqs=freqs,
    std=std,
    name='jack, sweep, motors'
)

for i_mic in range(means.shape[0]):
    ax_all.errorbar(freqs, means[i_mic], stds[i_mic], 
                    marker='o', ls='-', color=f'C{i_mic}', capsize=5.0)
ax_all.set_yscale('log')

# sweep study - audio deck

In [None]:
from evaluate_data import read_df

plot_min_freq = 2000.0
plot_max_freq = 5000.0

params = dict(
    degree=0, 
    props=False, 
    snr=False, 
    source='sweep', 
    distance=None, 
    exp_name='2021_01_15_snr_study_4'
)

params['motors'] = 0
df_audio, df_pos = read_df(**params)

In [None]:
df_audio.tail()
spec_audio_all, all_freqs = get_spectrogram(df_audio)
spec_audio = np.mean(spec_audio_all, axis=-2)

times = range(110, 300) #len(df_audio))

fig, ax = plt.subplots()
fig.set_size_inches(10, 5)
ax.pcolorfast(times, all_freqs, np.log10(spec_audio[:-1, times[:-1]]))
ax.set_ylim(plot_min_freq, plot_max_freq)

In [None]:
spec_audio[(all_freqs < plot_min_freq) | (all_freqs > plot_max_freq)] = 0
index_matrix = np.argsort(spec_audio, axis=0)[::-1]

psd_df = psd_df_from_spec(spec_audio_all, all_freqs, index_matrix, min_t=times[0], max_t=times[-1], n_freq=1)
psd, freqs, std = extract_psd(psd_df)
sweep_study.loc[len(sweep_study)-1, :] = dict(
    psd=psd,
    freqs=freqs,
    std=std,
    name='audio_deck, sweep, motors'
)

plt.figure()
for i_mic in range(psd.shape[0]):
    #plt.errorbar(freqs, psd[i_mic], std[i_mic], label=f'mic{i_mic}')
    plt.scatter(freqs, psd[i_mic], label=f'mic{i_mic}')
plt.yscale('log')
plt.legend()
plt.xlim(2000, 5000)

In [None]:
params['motors'] = 'all45000'
df_audio, df_pos = read_df(**params)

df_audio.tail()
spec_audio_all, all_freqs = get_spectrogram(df_audio)
spec_audio = np.mean(spec_audio_all, axis=-2)

times = range(110, 300) #len(df_audio))

fig, ax = plt.subplots()
fig.set_size_inches(10, 5)
ax.pcolorfast(times, all_freqs, np.log10(spec_audio[:-1, times[:-1]]))
ax.set_ylim(plot_min_freq, plot_max_freq)

In [None]:
#spec_audio[(all_freqs < plot_min_freq) | (all_freqs > plot_max_freq)] = 0
#mask = np.argsort(spec_audio, axis=0)[::-1]
shift_index_matrix = get_shifted_index_matrix(index_matrix, spec_audio_all)

psd_df = psd_df_from_spec(spec_audio_all, all_freqs, shift_index_matrix, min_t=times[0], max_t=times[-1], n_freq=1)
psd, freqs, std = extract_psd(psd_df)
sweep_study.loc[len(sweep_study)-1, :] = dict(
    psd=psd,
    freqs=freqs,
    std=std,
    name='audio_deck, sweep, motors'
)

plt.figure()
for i_mic in range(psd.shape[0]):
    #plt.errorbar(freqs, psd[i_mic], std[i_mic], label=f'mic{i_mic}')
    plt.scatter(freqs, psd[i_mic], label=f'mic{i_mic}')
plt.yscale('log')
plt.legend()
plt.xlim(2000, 5000)

# Propeller noise study

In [None]:
# plot strongest frequency
row = df_specs.loc[df_specs.title == 'sweep 45000'].iloc[0]
#row = df_specs.loc[df_specs.title == '45000'].iloc[0]
#row = df_specs.loc[df_specs.title == '55000'].iloc[0]

# find strongest frequency above 100
spec = np.mean(row.spec, axis=1)
max_freqs = np.round(row.freqs[np.argmax(spec, axis=0)], 1)
max_freqs_valid = max_freqs[max_freqs > 100]

# need to add the max value in the end to make sure this works for all bins
bins = np.r_[np.unique(max_freqs_valid), np.max(max_freqs_valid)]
count, freqs = np.histogram(max_freqs_valid, bins=bins)
max_freq = freqs[np.argmax(count)]

valid_time_indices = np.where(max_freqs == max_freq)[0]
invalid_time_indices = np.where(max_freqs != max_freq)[0]
bin_ = np.where(np.round(row.freqs, 1)==max_freq)[0][0]

plt.figure()
plt.title(f'loudness of bin at {max_freq} Hz')
min_idx, max_idx = np.min(valid_time_indices), np.max(valid_time_indices)
plt.semilogy(range(min_idx, max_idx), spec[bin_, min_idx:max_idx])
plt.scatter(valid_time_indices, spec[bin_, valid_time_indices], color='C1', label=f'{max_freq} is loudest bin')
plt.scatter(invalid_time_indices[min_idx:], spec[bin_, invalid_time_indices[min_idx:]], color='C2', label=f'{max_freq} is not loudest bin')
ylim = plt.ylim()
print(ylim)
plt.legend()

In [None]:
max_bins_valid = np.argmax(spec, axis=0)
plt.figure()
plt.title('loudness of loudest bin')
plt.semilogy(spec[max_bins_valid, range(spec.shape[1])])
plt.ylim(*ylim)

# Study mechanical shields

In [None]:
exp_dict = {
    'before': 'motors_nosnr_noprops_mono3125.wav',
    'after': 'motors_nosnr_noprops_mono3125_none.wav',
    'foam-hotdog': 'motors_nosnr_noprops_mono3125_hotdog.wav',
    'foam-burger': 'motors_nosnr_noprops_mono3125_burger.wav',
    'foam-tacos': 'motors_nosnr_noprops_mono3125_tacos.wav',
    'glue-under': 'motors_nosnr_noprops_mono3125_glue-under.wav',
    'glue-both': 'motors_nosnr_noprops_mono3125_glue-both.wav',
    'paper': 'motors_nosnr_noprops_mono3125_paper.wav',
    'paper-short': 'motors_nosnr_noprops_mono3125_paper-short.wav',
    'paper-short-new': 'motors_nosnr_noprops_mono3125_paper-short-new.wav',
    'paper-short-higher': 'motors_nosnr_noprops_mono3125_paper-short-higher.wav',
    'paper-final': 'motors_nosnr_noprops_mono3125_paper-final.wav',
}

freq = 3125
start_time = 67

#base_filename = f'../experiments/2021_01_14_snr_study/export/'
#base_filename = f'../experiments/2021_01_15_snr_study/export/'
#base_filename = f'../experiments/2021_01_15_snr_study_2/export/'
base_filename = f'../experiments/2021_01_21_snr_study_foam/export/'
window_list = ["tukey", "hann", "flattop", ""]
#window_list = ["flattop"]

df_specs = pd.DataFrame(columns=['title', 'spec', 'freqs', 'signal_time', 'window'])
for title, fname in exp_dict.items():
    for window in window_list:
        try:
            filename = base_filename + fname
            df_wav = read_df_from_wav(filename, method_window=window)
            signals_wav = read_signal_from_wav(filename) # n_times, n_mics, n_buffer

        except FileNotFoundError:
            print('skipping', filename)
            continue
        spec, freqs = get_spectrogram(df_wav)

        ax = plot_spectrogram(spec, freqs, max_freq=5000)
        ax.set_title(title)

        signal = signals_wav[start_time, 0, :]
        plt.figure()
        plt.plot(signal)
        plt.title(f'time signal at idx: {start_time}')
        #plt.ylim(-1, 1)

        df_specs.loc[len(df_specs), :] = {
            'title': title,
            'spec':spec,
            'freqs':freqs,
            'signal_time': signals_wav,
            'window': window
        }
        print(title)

In [None]:
#title_list = ['before', 'after', 'foam-hotdog', 'foam-burger', 'foam-tacos', 'glue-under', 'glue-both', 'paper']
title_list = df_specs.title.unique()
title_list = ['after', 'paper', 'paper-short-higher', 'paper-final']
ls = ':'

min_freq = 100
med_freq = 7000
max_freq = 14000
number = 15
zoom_dict = {
    'all': (med_freq, max_freq, 70),
    'lower-buzzer': (min_freq, med_freq, 5),
    'lower': (min_freq, med_freq, 70),
}
print(np.min(np.abs(freqs-3125)))

for zoom, (min_freq, max_freq, start_time) in zoom_dict.items():
    
    for window, df in df_specs.groupby('window'):
        fig, ax = plt.subplots()
        fig.set_size_inches(15, 5)
        for title in title_list:
            df_row = df.loc[df.title==title]
            assert len(df_row) == 1
            row = df_row.iloc[0]

            spec = np.mean(row.spec[..., start_time:start_time+number], axis=-2)
            freqs = row.freqs
            #ax_spec = plot_spectrogram(spec, row.freqs)
            #ax_spec.set_title(title)
            #ax.loglog(row.freqs, np.mean(spec, axis=1), label=title)
            mask = (freqs<max_freq) & (freqs>min_freq)
            ax.semilogy(freqs[mask], np.mean(spec[mask], axis=1), label=title, ls=ls)
        ax.axvline(3125, color='black')
        ax.set_title(f'average amplitude, window "{window}"')
        
        ax.legend(loc='upper right', framealpha=1.0)
        ax.set_xlabel('frequency [Hz]')
        ax.set_xlim(min_freq, max_freq)
        ax.set_ylim(1e-6, 5e2)
        fig.savefig(f'plots/comparison-{zoom}-{window}.eps', bbox_inches='tight')

# Denoising schemes

compare buzzer only with buzzer+propellers, with different denoising schemes

In [None]:
from scipy.io import wavfile
#fname = exp_dict['after']
fname = exp_dict['paper-final']
filename = base_filename + fname
fs, time_data = wavfile.read(filename)
print('read', filename, fs)

duration = 1.2
second_start = 2.0
n_samples = int(duration * fs)
sample_props = int(second_start * fs)
sample_start = 100

In [None]:
buzzer_only = time_data[sample_start:n_samples+sample_start]
ipd.Audio(data=buzzer_only, rate=fs)

In [None]:
buzzer_props = time_data[sample_props:sample_props+n_samples]
ipd.Audio(data=buzzer_props, rate=fs)

In [None]:
f = 3125
period = round(1/3125 * fs)
n_periods = 100
print('number of samples to average over:', period)

plt.figure()
plt.plot(buzzer_only[:n_periods*period])
plt.figure()
plt.plot(buzzer_props[:n_periods*period])

freqs = np.fft.rfftfreq(n_periods*period, 1/fs)
buzzer_only_f = np.fft.rfft(buzzer_only[:n_periods*period])
buzzer_props_f = np.fft.rfft(buzzer_props[:n_periods*period])
plt.figure()
plt.semilogy(freqs, np.abs(buzzer_only_f))
plt.xlim(0, 5000)
plt.figure()
plt.semilogy(freqs, np.abs(buzzer_props_f))
plt.xlim(0, 5000)