In [1]:
import os
import time
import pickle
import numpy as np
import networkx as nx

from scipy import signal
from scipy.fft import fft, fftfreq, fftshift
from scipy.signal import find_peaks, butter, filtfilt
from scipy.ndimage import gaussian_filter
from scipy.io import wavfile

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
data_file = "../data/scooter_example_1.wav"
fs, data = wavfile.read(data_file)

# crop data
start_time = 80 # seconds
end_time = 160 # seconds
data = data[int(start_time*fs):int(end_time*fs)]

# data_file = "../data/12062025_example.wav"
# fs, data = wavfile.read(data_file)


# set time annotations
# 1st detection: 3-37 seconds
# 2nd detection: 91-143 seconds
# need to account for time cropping
time_annotations = np.zeros_like(data, dtype=int)
# time_annotations[int(3*fs):int(37*fs)] = 1
time_annotations[int(11*fs):int(63*fs)] = 1


print(f"Sample rate: {fs} Hz")
print(f"Data shape: {data.shape}")

Sample rate: 128000 Hz
Data shape: (10240000,)


  fs, data = wavfile.read(data_file)


In [3]:
# Spectrogram parameters
nperseg=16384  # 1024, 2048, 4096, 8192, 16384, 32768, 64768
hop=0.25
noverlap=int(nperseg * (1 - hop))
window='hann'
title='Spectrogram'
colorscale='Viridis'
crop_freq=None  # Set to None to disable cropping

# # Compute spectrogram
# frequencies, times, Sxx = signal.spectrogram(data, fs=fs, window=window, nperseg=nperseg, noverlap=noverlap)

# # Crop frequencies if specified
# if crop_freq is not None:
#     freq_mask = frequencies <= crop_freq
#     frequencies = frequencies[freq_mask]
#     Sxx = Sxx[freq_mask, :]

# # Convert to dB scale
# Sxx_db = 10 * np.log10(Sxx + 1e-10)  # Add small value to avoid log(0)
# # Sxx_db = np.clip(Sxx_db, a_min=-50, a_max=50)

# # Create the heatmap
# fig = go.Figure(data=go.Heatmap(z=Sxx_db, x=times, y=frequencies, colorscale=colorscale, colorbar=dict(title='Power (dB)')))

# # Update layout
# fig.update_layout(title=title, xaxis_title='Time (s)', yaxis_title='Frequency (Hz)', width=800, height=600)
# fig.show()

---

In [4]:
# preprocess into slices
slice_length_seconds = 1  # seconds
slice_length_samples = slice_length_seconds * fs
slice_hop = 0.5  # seconds
step = int(slice_hop * fs)
num_of_slices = (len(data) - slice_length_samples) // step + 1
data_slices = np.empty((num_of_slices, slice_length_samples))
slice_annotations = []
annotation_threshold = 0.25  # fraction of slice that must be annotated to label the slice as positive

for start in range(0, len(data) - slice_length_samples + 1, step):
    end = start + slice_length_samples
    _data = data[start:end]
    _annotation = 1 if np.mean(time_annotations[start:end]) >= annotation_threshold else 0
    slice_annotations.append(_annotation)
    data_slices[start // step, :] = _data

slice_annotations = np.array(slice_annotations)
# print some stats
print(f"Total slices: {len(data_slices)}")
print(f"Positive slices: {np.sum(slice_annotations)}")
print(f"Negative slices: {len(data_slices) - np.sum(slice_annotations)}")

with open("../data/processed_data/slices.pkl", "wb") as f:
    pickle.dump((data_slices, slice_annotations), f)


Total slices: 159
Positive slices: 105
Negative slices: 54


In [None]:
def compute_spectral_graph_features(slice, fs=fs, nperseg=8192, noverlap=2048, window='hann', with_prints=False):
    if with_prints:
        print("Starting spectral graph feature computation...")

    _time = time.time()
    # spectrogram
    frequencies, times, Sxx = signal.spectrogram(slice, fs=fs, window=window, nperseg=nperseg, noverlap=noverlap)

    if with_prints:
        print(f" ---------------------- Spectrogram computation time: {time.time() - _time:.3f} seconds")
    _time = time.time()

    # convert to dB scale
    Sxx_db = 10 * np.log10(Sxx + 1e-10)  # Add small value to avoid log(0)
    if with_prints:
        print(f" ---------------------- dB conversion time: {time.time() - _time:.3f} seconds")
    _time = time.time()

    # high-pass filter
    cutoff_freq = 10  # Hz
    b, a = butter(4, cutoff_freq / (0.5 * fs), btype='high')
    Sxx = filtfilt(b, a, Sxx, axis=0)

    if with_prints:
        print(f" ---------------------- High-pass filter time: {time.time() - _time:.3f} seconds")
    _time = time.time()

    # compute correlation matrix
    corr_matrix = np.corrcoef(Sxx)
    if with_prints:
        print(f" ---------------------- Correlation matrix computation time: {time.time() - _time:.3f} seconds")
    _time = time.time()

    # compute adjacency matrix
    threshold = np.percentile(corr_matrix, 25)  # keep top 25% correlations
    adj_matrix = (corr_matrix <= threshold).astype(int)
    np.fill_diagonal(adj_matrix, 0)  # remove self-loops
    if with_prints:
        print(f" ---------------------- Adjacency matrix computation time: {time.time() - _time:.3f} seconds")
    _time = time.time()

    # create graph from adjacency matrix
    G = nx.from_numpy_array(adj_matrix)
    if with_prints:
        print(f" ---------------------- Graph creation time: {time.time() - _time:.3f} seconds")
    _time = time.time()

    # compute Laplacian matrix
    L = nx.laplacian_matrix(G).toarray()
    if with_prints:
        print(f" ---------------------- Laplacian matrix computation time: {time.time() - _time:.3f} seconds")
    _time = time.time()

    # compute eigenvalues and eigenvectors
    eigenvalues, eigenvectors = np.linalg.eigh(L)
    if with_prints:
        print(f" ---------------------- Eigen decomposition time: {time.time() - _time:.3f} seconds")

    return eigenvalues

In [None]:
# # compute features for all slices
# features = []
# print("Computing features for all slices...")
# for slice in data_slices:
#     _features = compute_spectral_graph_features(slice, with_prints=False)

#     _file_name = f"../data/processed_data/features_slice_{len(features)+1}.pkl"
#     with open(_file_name, "wb") as f:
#         pickle.dump(_features, f)

#     features.append(_features)
#     print(f"Computed features for slice {len(features)+1}/{len(data_slices)}")
    
# with open("../data/processed_data/slices_and_features.pkl", "wb") as f:
#     pickle.dump((data_slices, slice_annotations, features), f)

Computing features for all slices...
Computed features for slice 2/159
Computed features for slice 3/159
Computed features for slice 4/159
Computed features for slice 5/159
Computed features for slice 6/159
Computed features for slice 7/159
Computed features for slice 8/159
Computed features for slice 9/159
Computed features for slice 10/159
Computed features for slice 11/159
Computed features for slice 12/159
Computed features for slice 13/159
Computed features for slice 14/159
Computed features for slice 15/159
Computed features for slice 16/159
Computed features for slice 17/159
Computed features for slice 18/159
Computed features for slice 19/159
Computed features for slice 20/159
Computed features for slice 21/159
Computed features for slice 22/159
Computed features for slice 23/159
Computed features for slice 24/159
Computed features for slice 25/159
Computed features for slice 26/159
Computed features for slice 27/159
Computed features for slice 28/159
Computed features for slic