In [1]:
import os
import time
import pickle
import networkx as nx
import matplotlib.pyplot as plt

import numpy as np
from numpy import linalg as LA
from numpy import histogram2d

from scipy import signal
from scipy.fft import fft, fftfreq, fftshift
from scipy.signal import find_peaks, butter, filtfilt, welch
from scipy.ndimage import gaussian_filter
from scipy.io import wavfile

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import utils as ut
from utils import try_except_decorator
%load_ext autoreload
%autoreload 2

In [2]:
data_file1 = "../data/scooter_example_1.wav"
fs1, data1 = wavfile.read(data_file1)

# crop data
start_time = 60 # seconds
end_time = 160 # seconds
data1 = data1[int(start_time*fs1):int(end_time*fs1)]

# data_file = "../data/12062025_example.wav"
# fs, data = wavfile.read(data_file)


# set time annotations
# 1st detection: 3-37 seconds
d1_start = 3
d1_end = 37
# 2nd detection: 91-143 seconds
d2_start = 91
d2_end = 143
# need to account for time cropping
time_annotations = np.zeros_like(data1, dtype=int)
if d1_end > start_time and d1_start < end_time:
    s = max(d1_start, start_time) - start_time
    e = min(d1_end, end_time) - start_time
    time_annotations[int(s*fs1):int(e*fs1)] = 1
if d2_end > start_time and d2_start < end_time:
    s = max(d2_start, start_time) - start_time
    e = min(d2_end, end_time) - start_time
    time_annotations[int(s*fs1):int(e*fs1)] = 1
print(f"Sample rate: {fs1} Hz")
print(f"Data shape: {data1.shape}")
print(f"Positive Annotations: {np.sum(time_annotations)} samples, {np.sum(time_annotations)/fs1} seconds")
print(f"Negative Annotations: {np.sum(time_annotations==0)} samples, {np.sum(time_annotations==0)/fs1} seconds")

# f_spec, t, Sxx = ut.get_spectrogram(data1, fs1, crop_freq=5000)
# ut.plot_spectrogram(f_spec, t, Sxx)

slice1 = data1[int(35*fs1):int(40*fs1)]

f_spec1, t1, Sxx1 = ut.get_spectrogram(slice1, fs1, nperseg=8192)
ut.plot_spectrogram(f_spec1, t1, Sxx1)

  fs1, data1 = wavfile.read(data_file1)


Sample rate: 128000 Hz
Data shape: (12800000,)
Positive Annotations: 6656000 samples, 52.0 seconds
Negative Annotations: 6144000 samples, 48.0 seconds


In [3]:
data_file2 = "../data/12062025_example.wav"
fs2, data2 = wavfile.read(data_file2)

# crop data
start_time = 0 # seconds
end_time = 120 # seconds
data2 = data2[int(start_time*fs2):int(end_time*fs2)]

# set time annotations
# 1st detection: 25.5-28.5 seconds
d1_start = 25.5
d1_end = 28.5
# 2nd detection: 30.5-35 seconds
d2_start = 30.5
d2_end = 35
# 3rd detection: 52-120 seconds
d3_start = 52
d3_end = 120
# need to account for time cropping
time_annotations = np.zeros_like(data2, dtype=int)
if d1_end > start_time and d1_start < end_time:
    s = max(d1_start, start_time) - start_time
    e = min(d1_end, end_time) - start_time
    time_annotations[int(s*fs2):int(e*fs2)] = 1
if d2_end > start_time and d2_start < end_time:
    s = max(d2_start, start_time) - start_time
    e = min(d2_end, end_time) - start_time
    time_annotations[int(s*fs2):int(e*fs2)] = 1
if d3_end > start_time and d3_start < end_time:
    s = max(d3_start, start_time) - start_time
    e = min(d3_end, end_time) - start_time
    time_annotations[int(s*fs2):int(e*fs2)] = 1

print(f"Sample rate: {fs2} Hz")
print(f"Data shape: {data2.shape}")
print(f"Positive Annotations: {np.sum(time_annotations)} samples, {np.sum(time_annotations)/fs2} seconds")
print(f"Negative Annotations: {np.sum(time_annotations==0)} samples, {np.sum(time_annotations==0)/fs2} seconds")

# f_spec, t, Sxx = ut.get_spectrogram(data2, fs2)
# ut.plot_spectrogram(f_spec, t, Sxx)

slice2 = data2[int(85*fs2):int(90*fs2)]

f_spec2, t2, Sxx2 = ut.get_spectrogram(slice2, fs2, nperseg=8192)
ut.plot_spectrogram(f_spec2, t2, Sxx2)

Sample rate: 128000 Hz
Data shape: (15360000,)
Positive Annotations: 9664000 samples, 75.5 seconds
Negative Annotations: 5696000 samples, 44.5 seconds



Chunk (non-data) not understood, skipping it.



---

In [4]:
def normalize_data(x):
    x = x / 2
    xmin = np.min(x)
    xmax = np.max(x)
    normalized_data = (x - xmin) / (xmax - xmin) if xmax != xmin else 0
    return normalized_data

In [5]:
def quantize_data(x, n_levels):
    x_quantized = np.floor(x * n_levels).astype(int)
    x_quantized[x_quantized == n_levels] = n_levels - 1  # Handle edge case
    return x_quantized

In [6]:
def get_transition_matrix(x_quantized, n_levels):
    transitions = np.zeros((n_levels, n_levels), dtype=int)
    for i in range(1, len(x_quantized)):
        if x_quantized[i] != x_quantized[i-1]:
            transitions[x_quantized[i-1], x_quantized[i]] += 1
    return transitions

In [58]:
# process slice1
n_levels = 66000
normalized_slice1 = normalize_data(slice1)
quantized_slice1 = quantize_data(normalized_slice1, n_levels)

In [59]:
hist, bins = np.histogram(quantized_slice1, bins = 1000)
fig = go.Figure(data=go.Bar(x=bins[:-1], y=hist))
fig.show()

In [60]:
transition_matrix1 = get_transition_matrix(quantized_slice1, n_levels)

In [61]:
# dots = np.nonzero(transition_matrix1)
# values = transition_matrix1[dots]

# fig = go.Figure(data=go.Scattergl(
#     x=dots[0],
#     y=dots[1],
#     mode='markers',
#     marker=dict(
#         size=2,
#         color=values,
#         colorscale='Viridis',
#         colorbar=dict(title='Transition Count'),
#         showscale=True
#     )
# ))
# fig.update_layout(
#     title='State Transition Matrix Scatter Plot',
#     xaxis_title='From State',
#     yaxis_title='To State'
# )
# fig.show()

In [62]:
K1 = ut.get_K(transition_matrix1)
K1

array([[0.0001447]])

In [63]:
normalized_slice2 = ut.normalize_data(slice2)
quantized_slice2 = ut.quantize_data(normalized_slice2, n_levels)
transition_matrix2 = ut.get_transition_matrix(quantized_slice2, n_levels)
K2 = ut.get_K(transition_matrix2)
K2

array([[2.89458219e-05]])

In [64]:
G = nx.Graph()
edges1 = ut.get_s2g_edges(quantized_slice1)
G.add_nodes_from(range(n_levels))
G.add_edges_from(edges1)

In [65]:
# adj_matrix = np.where(transition_matrix1 > 0, 1, 0)
# vals1, vecs1, counts1 = ut.graph_decomposition(adj_matrix)


In [67]:
degrees1, degree_likelihood1 = ut.calc_degree_distribution(transition_matrix1)
degrees2, degree_likelihood2 = ut.calc_degree_distribution(transition_matrix2)

fig = make_subplots(rows=2, cols=1)
fig.add_trace(go.Scatter(x=degrees1[1:], y=degree_likelihood1[1:], mode='lines'), row=2, col=1)
fig.add_trace(go.Scatter(x=degrees2[1:], y=degree_likelihood2[1:], mode='lines'), row=1, col=1)
fig.update_layout(
    title='Degree Distribution',
    xaxis_title='Degree',
    yaxis_title='Frequency'
)
fig.show()