In [1]:
# imports and settings

import os
import time
import pickle
import warnings
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

import numpy as np
from numpy import linalg as LA
from numpy import histogram2d

from scipy import signal
from scipy.fft import fft, fftfreq, fftshift
from scipy.signal import find_peaks, butter, filtfilt, welch
from scipy.ndimage import gaussian_filter
from scipy.io import wavfile

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import utils as ut
%load_ext autoreload
%autoreload 2

# do not show warnings
warnings.filterwarnings("ignore")

print("Imports complete.")


# =======================

# plotting parameters
height = 800
width = 1400
font_size = 16
print(f"Settings: height={height}, width={width}, font_size={font_size}")

# =======================

names, fss, all_data, recommended_slices = ut.load_ds_samples()

Imports complete.
Settings: height=800, width=1400, font_size=16
Imports complete.
Settings: height=800, width=1400, font_size=16
Data dicts: 
names=['dpv1', 'dpv2', 'motorboat', 'large_ship', 'bg_noise']
fss keys=['dpv1', 'dpv2', 'motorboat', 'large_ship', 'bg_noise']
all_data keys=['dpv1', 'dpv2', 'motorboat', 'large_ship', 'bg_noise']
slices keys=['dpv1', 'dpv2', 'motorboat', 'large_ship', 'bg_noise']


In [2]:
# parameters and processing loop

fft_nperseg = 16384
percent_overlap = 0.5
window = 'hamming'
remove_dc = 50
crop_freq = 50000
normalization_window_size = 17
detection_threshold = 3
p_gap = 0.2
band_width = normalization_window_size // 2
sigma = 0.1
p_scale = 1

In [3]:
slice_len = 10  # seconds

slices = dict()
for name in names:
    print(f"Processing {name}...")
    _fs = fss[name]
    _data = all_data[name]
    _slices = [_data[i*slice_len*_fs:(i+1)*slice_len*_fs] for i in range(len(_data)//(slice_len*_fs))]
    slices[name] = _slices

print("Slicing complete.")

Processing dpv1...
Processing dpv2...
Processing motorboat...
Processing large_ship...
Processing bg_noise...
Slicing complete.


In [4]:
def calc_relative_entropy(Sxx, peacks, tracks, band=5):
    absolute_entropies = [ut.entropy(track) for track in tracks]
    relative_entropies_minus = []
    relative_entropies_div = []
    for i, p in enumerate(peacks):
        reference_band = Sxx[:, max(0, p-band):min(Sxx.shape[1], p+band)+1]
        reference_entropy = ut.entropy(reference_band.flatten())
        relative_entropies_minus.append(reference_entropy - ut.entropy(tracks[i]))
        relative_entropies_div.append(ut.entropy(tracks[i]) / reference_entropy)
    return absolute_entropies, relative_entropies_minus, relative_entropies_div

In [5]:
rel_entropies = []

for name in names:
    print(f"Calculating relative entropies for {name}...")
    _fs = fss[name]
    _slices = slices[name]
    for slice_index, _data in enumerate(_slices):
        _F, _T, _Sxx, _pxx, _peaks, _track_ixs, _tracks = ut.calc_tracks(_data, 
                                                                        fs=_fs, 
                                                                        fft_nperseg=fft_nperseg, 
                                                                        percent_overlap=percent_overlap, 
                                                                        window=window, 
                                                                        remove_dc=remove_dc, 
                                                                        crop_freq=crop_freq, 
                                                                        normalization_window_size=normalization_window_size, 
                                                                        detection_threshold=detection_threshold, 
                                                                        p_gap=p_gap, 
                                                                        band_width=band_width, 
                                                                        sigma=sigma, 
                                                                        p_scale=p_scale)
        absolute_entropies, rel_entropy_minus, rel_entropy_div = calc_relative_entropy(_Sxx, _peaks, _tracks, band=5)
        for i, peak in enumerate(_peaks):
            rel_entropies.append({
                "name": name,
                "slice_index": slice_index,
                "track_peak_freq": peak,
                "absolute_entropy": absolute_entropies[i],
                "rel_entropy_minus": rel_entropy_minus[i],
                "rel_entropy_div": rel_entropy_div[i]})
rel_entropies_df = pd.DataFrame(rel_entropies)
print("Relative entropies calculation complete.")

# plot relative entropies
fig = px.box(rel_entropies_df, x="name", y="absolute_entropy", points="all", title="Relative Entropy Minus by Class", height=height, width=width)
fig.update_layout(font=dict(size=font_size))
fig.show()

Calculating relative entropies for dpv1...
Calculating relative entropies for dpv2...
Calculating relative entropies for motorboat...
Calculating relative entropies for large_ship...
Calculating relative entropies for bg_noise...
Relative entropies calculation complete.


In [6]:
# relative entropies over time
diff_relative_entropies = []
# iterate over relative entropies
for name in names:
    for p in rel_entropies_df[rel_entropies_df['name'] == name]['track_peak_freq']:
        rel_entropies_name_peak = rel_entropies_df[(rel_entropies_df['name'] == name) & (rel_entropies_df['track_peak_freq'] == p)]
        rel_entropies_name_peak = rel_entropies_name_peak.sort_values(by='slice_index')
        mean_entropy = rel_entropies_name_peak['absolute_entropy'].mean()
        std_entropy = rel_entropies_name_peak['absolute_entropy'].std()
        diff_rel_entropy = rel_entropies_name_peak['absolute_entropy'].diff().fillna(0).values
        diff_relative_entropies.append({
            "name": name,
            "track_peak_freq": p,
            "mean_entropy": mean_entropy,
            "std_entropy": std_entropy,
            "diff_relative_entropy": diff_rel_entropy
        })
diff_rel_entropies_df = pd.DataFrame(diff_relative_entropies)
print("Difference of relative entropies calculation complete.")  

Difference of relative entropies calculation complete.


In [7]:
diff_rel_entropies_df

Unnamed: 0,name,track_peak_freq,mean_entropy,std_entropy,diff_relative_entropy
0,dpv1,31,3.195192,0.119305,"[0.0, 0.16872318026968758]"
1,dpv1,75,3.218323,0.084598,"[0.0, -0.05429124670631902, -0.145494867374362..."
2,dpv1,81,2.981477,0.332881,"[0.0, 0.004391946433136784, -0.042391963770747..."
3,dpv1,162,3.073420,0.140557,"[0.0, 0.3065604928060144, -0.28775731739524746..."
4,dpv1,31,3.195192,0.119305,"[0.0, 0.16872318026968758]"
...,...,...,...,...,...
185,large_ship,100,2.595301,,[0.0]
186,large_ship,110,3.023785,,[0.0]
187,large_ship,64,2.805902,,[0.0]
188,large_ship,102,2.669433,,[0.0]


In [8]:
# plot relative entropies
fig = px.box(diff_rel_entropies_df, x="name", y="mean_entropy", points="all", title="Relative Entropy Minus by Class", height=height, width=width)
fig.update_layout(font=dict(size=font_size))
fig.show()