In [36]:
# imports and settings

import os
import time
import pickle
import warnings
import pandas as pd
import networkx as nx
from networkx.algorithms.coloring import greedy_color
import matplotlib.pyplot as plt
from copy import deepcopy

import numpy as np
from numpy import linalg as LA
from numpy import histogram2d

from scipy import signal
from scipy.fft import fft, fftfreq, fftshift
from scipy.signal import find_peaks, butter, filtfilt, welch, get_window
from scipy.ndimage import gaussian_filter
from scipy.io import wavfile
from scipy.stats import wasserstein_distance_nd

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics.pairwise import rbf_kernel, polynomial_kernel, linear_kernel

import utils as ut
%load_ext autoreload
%autoreload 2

# do not show warnings
warnings.filterwarnings("ignore")

print("Imports complete.")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Imports complete.


In [2]:
folder_path = '../data/ships'
files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.wav')]
print(f"Found {len(files)} .wav files in {folder_path}.")

Found 7 .wav files in ../data/ships.


In [24]:
nperseg = 2048
overlap = 0.
window = 'hanning'
dc = 20
crop_freq = 8000
norm_size = 9

fig = make_subplots(rows=len(files), cols=2, column_widths=[0.7, 0.3], horizontal_spacing=0.05, shared_yaxes=True)

for i, file in enumerate(files):
    fs, data = wavfile.read(file)
    data = data[fs*1:fs*6]
    F, T, Sxx, phasogram = ut.calc_spectrogram(data, fs, nperseg=nperseg, percent_overlap=overlap, window=window, remove_dc=dc, crop_freq=crop_freq)
    pxx = ut.calc_welch_from_spectrogram(Sxx, normalization_window_size=norm_size)
    # pxx = np.where(pxx >= (np.mean(pxx) + np.std(pxx)), pxx, 0)
    print(f"file: {os.path.basename(file)}, avg power: {np.mean(pxx):.2f}, std power: {np.std(pxx):.2f}")
    fig.add_trace(go.Heatmap(z=Sxx, x=T, y=F, colorscale='Viridis', showlegend=False, showscale=False), row=i+1, col=1)
    fig.add_trace(go.Scatter(x=pxx, y=F, mode='lines', line=dict(color='blue'), showlegend=False), row=i+1, col=2)

fig.update_layout(height=300*len(files), width=900, title_text="Spectrograms and Welch PSDs of Ship Audio Files")
fig.show()

file: Motorboat_02.08.23_115002_20secCPA.wav, avg power: 0.02, std power: 0.02
file: Yacht_02.08.23_111540_20secCPA.wav, avg power: 0.01, std power: 0.01
file: Yacht_17.08.23_080644(excellence)_20secCPA.wav, avg power: 0.01, std power: 0.01
file: Motorboat_02.08.23_110547_20secCPA.wav, avg power: 0.02, std power: 0.02
file: Yacht_24.08.23_135926_20secCPA.wav, avg power: 0.01, std power: 0.01
file: Large_Yacht_Excellence_02.08.23_143345_20secCPA.wav, avg power: 0.01, std power: 0.01
file: Motorboat_02.08.23_112229_20secCPA.wav, avg power: 0.01, std power: 0.01


In [30]:
ds_path = '../data/mark DS'
# iterate through all .wav files in the folder, print their names and subfolder names
all_files = []
all_labels_lvl1 = []
all_labels_lvl2 = []
for root, dirs, files in os.walk(ds_path):
    for file in files:
        if file.endswith('.wav'):
            rel = os.path.relpath(root, ds_path)
            parts = [] if rel == '.' else rel.split(os.sep)
            lvl1 = parts[0] if len(parts) >= 1 else ''
            lvl2 = parts[1] if len(parts) >= 2 else lvl1
            all_files.append(os.path.join(root, file))
            all_labels_lvl1.append(lvl1)
            all_labels_lvl2.append(lvl2)
            print(f"Level1: {lvl1}, Level2: {lvl2}, File: {file}")

Level1: Jet Ski, Level2: Jet Ski_23.08.23_174617, File: Jet Ski_23.08.23_174617_20secCPA.wav
Level1: Jet Ski, Level2: Jet Ski_16.08.23_123211, File: Jet Ski_16.08.23_123211_20secCPA.wav
Level1: Jet Ski, Level2: Jet Ski_16.08.23_163512, File: Jet Ski_16.08.23_163512_20secCPA.wav
Level1: Jet Ski, Level2: Jet Ski_16.08.23_152540, File: Jet Ski_16.08.23_152540_20secCPA.wav
Level1: Jet Ski, Level2: Jet Ski_24.08.23_143337, File: Jet Ski_24.08.23_143337_20secCPA.wav
Level1: ambient noise, Level2: ambient noise, File: 01.09.23_071227_ambient noise.wav
Level1: ambient noise, Level2: ambient noise, File: 02.09.23_0800_ambient noise.wav
Level1: ambient noise, Level2: ambient noise, File: 09.08.23_073903_ambient_noise_measurment.wav
Level1: ambient noise, Level2: ambient noise, File: 27.08.23_070056_ambient_noise_measurment.wav
Level1: ambient noise, Level2: ambient noise, File: 31.08.23_073720_ambient_noise_measurment.wav
Level1: ambient noise, Level2: ambient noise, File: 26.08.23_0701_ambient_

In [31]:
embedded = []
for i, file in enumerate(all_files):
    print(f"Processing file {i+1}/{len(all_files)}: {os.path.basename(file)}")
    fs, data = wavfile.read(file)
    data = data[:fs*5]
    F, T, Sxx, phasogram = ut.calc_spectrogram(data, fs, nperseg=nperseg, percent_overlap=overlap, window=window, remove_dc=dc, crop_freq=crop_freq)
    pxx = ut.calc_welch_from_spectrogram(Sxx, normalization_window_size=norm_size)
    embedded.append(pxx)

Processing file 1/1501: Jet Ski_23.08.23_174617_20secCPA.wav
Processing file 2/1501: Jet Ski_16.08.23_123211_20secCPA.wav
Processing file 3/1501: Jet Ski_16.08.23_163512_20secCPA.wav
Processing file 4/1501: Jet Ski_16.08.23_152540_20secCPA.wav
Processing file 5/1501: Jet Ski_24.08.23_143337_20secCPA.wav
Processing file 6/1501: 01.09.23_071227_ambient noise.wav
Processing file 7/1501: 02.09.23_0800_ambient noise.wav
Processing file 8/1501: 09.08.23_073903_ambient_noise_measurment.wav
Processing file 9/1501: 27.08.23_070056_ambient_noise_measurment.wav
Processing file 10/1501: 31.08.23_073720_ambient_noise_measurment.wav
Processing file 11/1501: 26.08.23_0701_ambient_noise_measurment.wav
Processing file 12/1501: Jadrolinija_21.08.23_100900_20secCPA.wav
Processing file 13/1501: Jadrolinija_08.08.23_114142_20secCPA.wav
Processing file 14/1501: Jadrolinija_03.08.23_160737_20secCPA.wav
Processing file 15/1501: Jadrolinija_04.09.23_163703_20secCPA.wav
Processing file 16/1501: Jadrolinija_18.0

In [37]:
def svm_sv_embedding(X, SV, gamma):
    return rbf_kernel(X, SV, gamma=gamma)

In [40]:
# train test split
X_train, X_test, y_train, y_test = train_test_split(embedded, all_labels_lvl2, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)

svm = SVC(kernel="rbf", C=1.0, gamma="scale")   # important for kernel eval later
svm.fit(X_train_s, y_train)

SV = svm.support_vectors_
gamma = svm._gamma   # sklearn internal but stable
X_train_emb = svm_sv_embedding(X_train_s, SV, gamma)
# X_test_emb  = svm_sv_embedding(X_test_s,  SV, gamma)

knn = KNeighborsClassifier(n_neighbors=7, metric="cosine", weights="distance")   # euclidean also common here
knn.fit(X_train_emb, y_train)

print("Training complete.")

Training complete.


In [41]:
def cls_predict(X):
    X_s = scaler.transform(X)
    X_emb = svm_sv_embedding(X_s, SV, gamma)
    y_pred = knn.predict(X_emb)
    return y_pred

In [42]:
cm = confusion_matrix(y_test, cls_predict(X_test), labels=knn.classes_)
fig_cm = go.Figure(data=go.Heatmap(
    z=cm,
    x=knn.classes_,
    y=knn.classes_,
    colorscale='Viridis'
))
fig_cm.update_layout(
    title='Confusion Matrix',
    xaxis_title='Predicted Label',
    yaxis_title='True Label'
)
fig_cm.show()

In [44]:
knn2 = KNeighborsClassifier(n_neighbors=7, metric="cosine", weights="distance")   # euclidean also common here
knn2.fit(X_train, y_train)

print("Training complete.")

Training complete.


In [45]:
knn2_pred = knn2.predict(X_test)
cm = confusion_matrix(y_test, knn2_pred, labels=knn2.classes_)
fig_cm = go.Figure(data=go.Heatmap(
    z=cm,
    x=knn2.classes_,
    y=knn2.classes_,
    colorscale='Viridis'
))
fig_cm.update_layout(
    title='Confusion Matrix',
    xaxis_title='Predicted Label',
    yaxis_title='True Label'
)
fig_cm.show()

In [None]:
nperseg = 2048
overlap = 0.
window = 'hanning'
dc = 20
crop_freq = 8000
norm_size = 9

folder_path = '../data/mark DS/Motor Boats'
# collect .wav files correctly using os.walk (os.walk yields tuples)
files = []
for root, dirs, filenames in os.walk(folder_path):
    for f in filenames:
        if f.endswith('.wav'):
            files.append(os.path.join(root, f))
print(f"Found {len(files)} .wav files in {folder_path}.")

if len(files) == 0:
    print("No .wav files found — skipping plotting.")
else:
    fig = make_subplots(rows=len(files), cols=2, column_widths=[0.7, 0.3], horizontal_spacing=0.05, shared_yaxes=True)

    for i, file in enumerate(files):
        fs, data = wavfile.read(file)
        data = data[fs*1:fs*6]
        F, T, Sxx, phasogram = ut.calc_spectrogram(data, fs, nperseg=nperseg, percent_overlap=overlap, window=window, remove_dc=dc, crop_freq=crop_freq)
        pxx = ut.calc_welch_from_spectrogram(Sxx, normalization_window_size=norm_size)
        # pxx = np.where(pxx >= (np.mean(pxx) + np.std(pxx)), pxx, 0)
        print(f"file: {os.path.basename(file)}, avg power: {np.mean(pxx):.2f}, std power: {np.std(pxx):.2f}")
        fig.add_trace(go.Heatmap(z=Sxx, x=T, y=F, colorscale='Viridis', showlegend=False, showscale=False), row=i+1, col=1)
        fig.add_trace(go.Scatter(x=pxx, y=F, mode='lines', line=dict(color='blue'), showlegend=False), row=i+1, col=2)

    fig.update_layout(height=300*len(files), width=900, title_text="Spectrograms and Welch PSDs of Ship Audio Files")
    fig.show()

Found 757 .wav files in ../data/mark DS/Motor Boats.
file: Motorboat_01.09.23_100710_20secCPA.wav, avg power: 0.02, std power: 0.02
file: Motorboat_09.08.23_130220_20secCPA.wav, avg power: 0.01, std power: 0.01
file: Motorboat_08.08.23_143004_20secCPA.wav, avg power: 0.01, std power: 0.01
file: Motorboat_01.09.23_124929_20secCPA.wav, avg power: 0.01, std power: 0.02
file: Motorboat_26.08.23_124922_20secCPA.wav, avg power: 0.01, std power: 0.01
file: Motorboat_02.08.23_130949_20secCPA.wav, avg power: 0.01, std power: 0.01
file: Motorboat_05.09.23_180710_20secCPA.wav, avg power: 0.01, std power: 0.01
file: Motorboat_27.08.23_094429_20secCPA.wav, avg power: 0.01, std power: 0.01
file: Motorboat_26.08.23_103703_20secCPA.wav, avg power: 0.01, std power: 0.01
file: Motorboat_16.08.23_115116_20secCPA.wav, avg power: 0.01, std power: 0.01
file: Motorboat_18.08.23_185707_20secCPA.wav, avg power: 0.01, std power: 0.01
file: Motorboat_05.09.23_174955_20secCPA.wav, avg power: 0.01, std power: 0.02