In [5]:
# imports and settings

import os
import time
import pickle
import librosa
import warnings
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from copy import deepcopy

import numpy as np
from numpy import linalg as LA
from numpy import histogram2d

from scipy import signal
from scipy.fft import fft, fftfreq, fftshift
from scipy.signal import find_peaks, butter, filtfilt, welch
from scipy.ndimage import gaussian_filter
from scipy.io import wavfile
from scipy.stats import wasserstein_distance_nd

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import utils as ut
%load_ext autoreload
%autoreload 2

# do not show warnings
warnings.filterwarnings("ignore")

print("Imports complete.")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Imports complete.


In [4]:
folder_path = '../data/from_mark/Vessel SPL dictionary Project/audio mes'
all_files = []
all_cls = []

subfolder_list = os.listdir(folder_path)
for subfolder in subfolder_list:
    subfolder_path = os.path.join(folder_path, subfolder)
    if os.path.isdir(subfolder_path):
        file_list = os.listdir(subfolder_path)
        file_list = [f for f in file_list if f.endswith('.wav')]
        for f in file_list:
            all_files.append(os.path.join(subfolder_path, f))
            all_cls.append(subfolder)
print(f"Found {len(all_files)} audio files in {len(subfolder_list)} classes.")

Found 1349 audio files in 21 classes.


In [63]:
# parameters
target_fs = 16000  # original sampling rate
fft_nperseg = 8192  # ~0.2 sec
percent_overlap = 0.0
window = 'hamming'
remove_dc = 20
crop_freq = 4000
nbins = 20  # number of levels for S2G
normalization_window_size = 9  # for welch PSD

In [64]:
f = all_files[10]
print(f)

data, fs = librosa.load(f, sr=None)
F, T, Sxx, phase = ut.calc_spectrogram(data, fs, nperseg=fft_nperseg, percent_overlap=percent_overlap, window=window, remove_dc=remove_dc, crop_freq=crop_freq)
pxx = ut.calc_welch_from_spectrogram(Sxx, normalization_window_size=normalization_window_size)

Ks = []
for f in F:
    f_idx = np.argmin(np.abs(F - f))
    x = phase[f_idx, :]
    transitions = ut.get_s2g(x, n_levels=nbins)
    K = ut.get_K(transitions)
    Ks.append(K)

fig = make_subplots(rows=1, cols=3, subplot_titles=('Spectrogram', 'Welch PSD'), shared_yaxes=True, horizontal_spacing=0.1, column_widths=[0.6, 0.2, 0.2])
fig.add_trace(go.Heatmap(z=Sxx, x=T, y=F, colorscale='Viridis', colorbar=dict(title='Intensity'), showscale=False), row=1, col=1)
fig.add_trace(go.Scatter(x=pxx, y=F, mode='lines', name='Welch PSD', showlegend=False), row=1, col=2)
fig.add_trace(go.Scatter(x=Ks, y=F, orientation='h', name='K values', showlegend=False), row=1, col=3)
fig.update_yaxes(title_text='Frequency (Hz)', row=1, col=3)
fig.update_xaxes(title_text='K value', row=1, col=3)
fig.update_yaxes(title_text='Frequency (Hz)', row=1, col=1)
fig.update_xaxes(title_text='Time (s)', row=1, col=1)
fig.update_yaxes(title_text='Frequency (Hz)', row=1, col=2)
fig.update_xaxes(title_text='Power/Frequency (dB/Hz)', row=1, col=2)
fig.update_layout(height=600, width=900, title_text='Spectrogram and Welch PSD')
fig.show()

../data/from_mark/Vessel SPL dictionary Project/audio mes/ferry - jarold/Yacht_03.09.23_100017_20secCPA_01.wav


In [67]:
# use a file path from all_files (avoid the variable name collision with 'f' used for frequencies)
audio_file = all_files[10]
data, fs = librosa.load(audio_file)  # , sr=16000)
data = data[:1*fs]  # use only first 1 seconds
nbins = 10  # number of levels for S2G

fig = make_subplots(rows=2, cols=10, horizontal_spacing=0.02, subplot_titles=[f'SNR={10*(i+1)}dB' for i in range(10)])
for i in range(10):
    SNR = 0.1 * (i+1)  # desired SNR in dB
    signal_power = np.mean(data**2)
    noise_power = signal_power / SNR
    noise = noise_power * np.random.normal(size=data.shape)
    noisy_data = data + noise
    x = ut.get_s2g(noisy_data, n_levels=nbins)
    K = ut.get_K(x)
    fig.add_trace(go.Heatmap(z=x, colorscale='Viridis', showscale=False), row=1, col=i+1)
    fig.update_xaxes(title_text=f'K={K:.2f}', row=1, col=i+1)

    # graph from S2G
    G = ut.get_s2g_graph(noisy_data, n_levels=nbins)
    fig_g = ut.draw_graph(G)
    for tr in fig_g.data:
                fig.add_trace(deepcopy(tr), row=2, col=i+1)

fig.update_layout(title="different SNR levels", font=dict(size=12), height=600, width=1600)
fig.show()