In [1]:
# imports and settings

import os
import time
import pickle
import warnings
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

import numpy as np
from numpy import linalg as LA
from numpy import histogram2d

from scipy import signal
from scipy.fft import fft, fftfreq, fftshift
from scipy.signal import find_peaks, butter, filtfilt, welch
from scipy.ndimage import gaussian_filter
from scipy.io import wavfile

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import utils as ut
%load_ext autoreload
%autoreload 2

# do not show warnings
warnings.filterwarnings("ignore")

print("Imports complete.")

names, fss, all_data, recommended_slices = ut.load_ds_samples()

Imports complete.
Settings: height=800, width=1400, font_size=16
Imports complete.
Data dicts: 
names=['dpv1', 'dpv2', 'motorboat', 'large_ship', 'bg_noise_1', 'bg_noise_2', 'croatia_boat']
fss keys=['dpv1', 'dpv2', 'motorboat', 'large_ship', 'bg_noise_1', 'bg_noise_2', 'croatia_boat']
all_data keys=['dpv1', 'dpv2', 'motorboat', 'large_ship', 'bg_noise_1', 'bg_noise_2', 'croatia_boat']
slices keys=['dpv1', 'dpv2', 'motorboat', 'large_ship', 'bg_noise_1', 'bg_noise_2', 'croatia_boat']


In [8]:
# parameters and processing loop
height = 800
width = 1400
font_size = 16
fft_nperseg = 16384
percent_overlap = 0.5
window = 'hamming'
remove_dc = 20
crop_freq = 4000
normalization_window_size = 17
detection_threshold = 3
p_gap = 0.2
band_width = normalization_window_size // 2
sigma = 0.1
p_scale = 1
slice_len = 10  # seconds

show_tracks = False


## =============================
# test parameters on one sample:

name = 'motorboat'  # change to test other samples
fs = fss[name]
data = all_data[name]
F, T, Sxx, pxx, peaks, track_ixs, tracks = ut.calc_tracks(data, 
                                                        fs=fs, 
                                                        fft_nperseg=fft_nperseg, 
                                                        percent_overlap=percent_overlap, 
                                                        window=window, 
                                                        remove_dc=remove_dc, 
                                                        crop_freq=crop_freq, 
                                                        normalization_window_size=normalization_window_size, 
                                                        detection_threshold=detection_threshold, 
                                                        p_gap=p_gap, 
                                                        band_width=band_width, 
                                                        sigma=sigma, 
                                                        p_scale=p_scale)

fig = make_subplots(rows=1, cols=2, shared_yaxes=True, column_widths=[0.8, 0.2], horizontal_spacing=0.15, vertical_spacing=0.15, column_titles=("Spectrogram", "Welch Power Spectral Density"))
fig.add_trace(go.Heatmap(x=T, y=F, z=Sxx.T, colorscale='Viridis', showscale=False), row=1, col=1)
fig.add_trace(go.Scatter(x=pxx, y=F, mode='lines', name='Welch PSD', showlegend=False, line=dict(color='blue', width=2)), row=1, col=2)
fig.add_trace(go.Scatter(x=pxx[peaks], y=F[peaks], mode='markers', name='Detected Peaks', marker=dict(color='cyan', size=8), showlegend=False), row=1, col=2)

if show_tracks:
    for track in track_ixs:
        fig.add_trace(go.Scatter(x=T, y=F[track], mode='markers', line=dict(color='cyan', width=1), name='Viterbi Track', showlegend=False), row=1, col=1)

fig.update_yaxes(title_text="Frequency (Hz)", row=1, col=1)
fig.update_xaxes(title_text="Time (s)", row=1, col=1)
fig.update_yaxes(title_text="Frequency (Hz)", row=1, col=2)
fig.update_xaxes(title_text="Power/Frequency (dB/Hz)", row=1, col=2)
fig.update_layout(height=height, width=width, title_text=f"Spectrogram and Welch PSD with Detected Peaks of {name}", font=dict(size=font_size))

In [11]:
# calc tracks and entropies for all samples and slice them

all_slices = []
for name in names:
    print(f"Extracting slices for {name}...")
    fs = fss[name]
    data = all_data[name]

    F, T, Sxx, pxx, peaks, track_ixs, tracks = ut.calc_tracks(data, 
                                                            fs=fs, 
                                                            fft_nperseg=fft_nperseg, 
                                                            percent_overlap=percent_overlap, 
                                                            window=window, 
                                                            remove_dc=remove_dc, 
                                                            crop_freq=crop_freq, 
                                                            normalization_window_size=normalization_window_size, 
                                                            detection_threshold=detection_threshold, 
                                                            p_gap=p_gap, 
                                                            band_width=band_width, 
                                                            sigma=sigma, 
                                                            p_scale=p_scale)

    for i, peak in enumerate(peaks):
        slice_step = len(T) // (len(data) // (slice_len * fs))
        tracks_sliced = [tracks[i][k*slice_step:(k+1)*slice_step] for k in range(int(len(T)/slice_step))]
        tracks_ixs_sliced = [track_ixs[i][k*slice_step:(k+1)*slice_step] for k in range(int(len(T)/slice_step))]
        for j, track_slice in enumerate(tracks_sliced):
            all_slices.append({
                "name": name,
                "slice_index": j,
                "track_peak_freq": F[peak],
                "track_entropy": ut.entropy(track_slice)
            })
            
print("All slices extracted.")

all_slices_df = pd.DataFrame(all_slices)
all_slices_df

# plot summarized entropies
fig = px.box(all_slices_df, x="name", y="track_entropy", points="all", 
             title="Average Track Entropy by Class", height=height, width=width,
             hover_data=all_slices_df.columns, color="name")
fig.update_layout(font=dict(size=font_size))
fig.show()

Extracting slices for dpv1...
Extracting slices for dpv2...
Extracting slices for motorboat...
Extracting slices for large_ship...
Extracting slices for bg_noise_1...
Extracting slices for bg_noise_2...
Extracting slices for croatia_boat...
All slices extracted.
