## try information graph from 5s slices (combine nb7+nb12)

In [3]:
import os
import time
import pickle
import networkx as nx
import matplotlib.pyplot as plt

import numpy as np
from numpy import linalg as LA
from numpy import histogram2d

from scipy import signal
from scipy.fft import fft, fftfreq, fftshift
from scipy.signal import find_peaks, butter, filtfilt
from scipy.ndimage import gaussian_filter
from scipy.io import wavfile

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import utils as ut
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
data_file = "../data/scooter_example_1.wav"
fs, data = wavfile.read(data_file)

# crop data
start_time = 60 # seconds
end_time = 160 # seconds
data = data[int(start_time*fs):int(end_time*fs)]

# data_file = "../data/12062025_example.wav"
# fs, data = wavfile.read(data_file)


# set time annotations
# 1st detection: 3-37 seconds
# 2nd detection: 91-143 seconds
# need to account for time cropping
time_annotations = np.zeros_like(data, dtype=int)
# time_annotations[int(3*fs):int(37*fs)] = 1
time_annotations[int(11*fs):int(63*fs)] = 1


print(f"Sample rate: {fs} Hz")
print(f"Data shape: {data.shape}")
print(f"Positive Annotations: {np.sum(time_annotations)} samples, {np.sum(time_annotations)/fs} seconds")
print(f"Negative Annotations: {np.sum(time_annotations==0)} samples, {np.sum(time_annotations==0)/fs} seconds")

Sample rate: 128000 Hz
Data shape: (12800000,)
Positive Annotations: 6656000 samples, 52.0 seconds
Negative Annotations: 6144000 samples, 48.0 seconds



Chunk (non-data) not understood, skipping it.



In [10]:
f, t, Sxx = ut.get_spectrogram(data, fs)
ut.plot_spectrogram(f, t, Sxx)

In [11]:
slices, annotations = ut.slice_signal(data, time_annotations, fs, slice_duration=5.0)
print(f"Slices shape: {slices.shape}")
print(f"Annotations shape: {annotations.shape}")

Slices shape: (39, 640000)
Annotations shape: (39,)


In [67]:
# find slice at time 40 seconds, account for 50% overlap
slice_index = int(40/5) * 2
print(f"Slice index at time 40 seconds: {slice_index}")
s = slices[slice_index]
a = annotations[slice_index]
print(f"Slice annotation: {a}")
f, t, Sxx = ut.get_spectrogram(s, fs, nperseg=8192, noverlap=0.2, crop_freq=2000)
ut.plot_spectrogram(f, t, Sxx)

Slice index at time 40 seconds: 16
Slice annotation: 1


In [68]:
adjacency_matrix = ut.compute_adjacency_matrix(Sxx)  #, percentile_threshold=90)

# plot adjacency matrix
fig = go.Figure(data=go.Heatmap(z=adjacency_matrix, x=f, y=f))
fig.update_layout(title="Adjacency Matrix", xaxis_title="Frequency [Hz]", yaxis_title="Frequency [Hz]", height=600, width=600)
fig.show()

In [41]:
e_vals, e_vecs, e_comps = ut.graph_decomposition(adjacency_matrix)

In [42]:
# plot eigenvalues
fig = go.Figure()
fig.add_trace(go.Scatter(y=e_vals, mode='markers+lines', name='Eigenvalues'))
fig.update_layout(title="Eigenvalues", xaxis_title="Index", yaxis_title="Eigenvalue", height=400, width=600)
fig.show()

In [63]:
vecs = []

for i in range(slices.shape[0]):
    s = slices[i]
    # a = annotations[i]
    f, t, Sxx = ut.get_spectrogram(s, fs, nperseg=8192, noverlap=0.2, crop_freq=2000)
    adjacency_matrix = ut.compute_adjacency_matrix(Sxx)  #, percentile_threshold=90)
    e_vals, e_vecs, e_comps = ut.graph_decomposition(adjacency_matrix)
    vecs.append(e_vals)

In [64]:
# plot vecs
vecs = np.array(vecs)
print(f"Vecs shape: {vecs.shape}")
num_eigenvalues = 10

fig = go.Figure()
for i in range(vecs.shape[0]):
    fig.add_trace(go.Scatter(y=vecs[i, :num_eigenvalues], mode='markers+lines', name=f'vector {i+1}', 
                             line=dict(color=("blue" if annotations[i] else "red"))))
fig.update_layout(title="Eigenvalues over Slices", xaxis_title="Slice Index", yaxis_title="Eigenvalue", height=400, width=600)
fig.show()

Vecs shape: (39, 89)


In [51]:
# find slice at time 40 seconds, account for 50% overlap
slice_index = int(40/5) * 2
print(f"Slice index at time 40 seconds: {slice_index}")
s = slices[slice_index]
a = annotations[slice_index]
print(f"Slice annotation: {a}")
f, t, Sxx = ut.get_spectrogram(s, fs, nperseg=8192, noverlap=0.2, crop_freq=16000)
ut.plot_spectrogram(f, t, Sxx)

Slice index at time 40 seconds: 16
Slice annotation: 1


In [52]:
# calculate entropy of each frequency component over time
entropies = []
for i in range(Sxx.shape[0]):
    p = Sxx[i, :] / np.sum(Sxx[i, :])
    p = p[p > 0]
    entropy = -np.sum(p * np.log2(p))
    entropies.append(entropy)
entropies = np.array(entropies)

# plot entropies
fig = go.Figure()
fig.add_trace(go.Scatter(y=entropies, mode='markers+lines', name='Entropy'))
fig.update_layout(title="Entropy of Frequency Components", xaxis_title="Frequency Bin", yaxis_title="Entropy", height=400, width=600)
fig.show()

In [53]:
def get_entropies(Sxx):
    entropies = []
    for i in range(Sxx.shape[0]):
        p = Sxx[i, :] / np.sum(Sxx[i, :])
        p = p[p > 0]
        entropy = -np.sum(p * np.log2(p))
        entropies.append(entropy)
    return np.array(entropies)

In [62]:
_f, _t, _Sxx = ut.get_spectrogram(slices[0], fs, nperseg=8192, noverlap=0.2, crop_freq=2000)
entropy = np.zeros((slices.shape[0], len(_f)))

for i, s in enumerate(slices):
    f, t, Sxx = ut.get_spectrogram(s, fs, nperseg=8192, noverlap=0.2, crop_freq=2000)
    entropy[i, :] = get_entropies(Sxx)

# plot entropy over slices
fig = go.Figure(go.Heatmap(z=entropy.T, x=np.arange(slices.shape[0]), y=f))
fig.update_layout(title="Entropy over Slices", xaxis_title="Slice Index", yaxis_title="Frequency [Hz]", height=600, width=600)
fig.show()

**mutual information formula**

$$
I(X,Y) = H(X) + H(Y) - H(X, Y)
$$

$$
H(X) = - \sum_i{p(x_i)log_s{p(x_i)}}
$$

$$
H(X, Y) = \sum_{x,y}{p(x_i, y_i)log_s{p(x_i, y_i)}}
$$

**normalized mutual information**

$$
NMI(X, Y) = \frac{2I(X, Y)}{H(X) + H(Y)}
$$