In [1]:
import os
import time
import pickle
import networkx as nx
import matplotlib.pyplot as plt

import numpy as np
from numpy import linalg as LA
from numpy import histogram2d

from scipy import signal
from scipy.fft import fft, fftfreq, fftshift
from scipy.signal import find_peaks, butter, filtfilt, welch
from scipy.ndimage import gaussian_filter
from scipy.io import wavfile

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import utils as ut
from utils import try_except_decorator
%load_ext autoreload
%autoreload 2

In [2]:
ds_folder = "../data/ds1_5_classes/"
classes = ["dpv1", "dpv2", "croatia_ship", "bg_noise"]  # , "small_ship", "large_ship"

file_names = dict()
for cls in classes:
    class_folder = os.path.join(ds_folder, cls)
    file_names[cls] = [os.path.join(class_folder, f) for f in os.listdir(class_folder) if f.endswith(".wav")]
    print(f"Class {cls}: {len(file_names[cls])} files")

Class dpv1: 34 files
Class dpv2: 27 files
Class croatia_ship: 71 files
Class bg_noise: 71 files


In [3]:
def rand_sample_slices(file_names, seed=None):
    slices = []
    slice_labels = []
    classes = list(file_names.keys())
    if seed is not None:
        np.random.seed(seed)
    for cls in classes:
        print(f"Processing class {cls}...")
        rand_ixs = np.random.choice(len(file_names[cls]), 1, replace=False)
        for ix in rand_ixs:
            file = file_names[cls][ix]
            fs, data = wavfile.read(file)
            slices.append(data)
            slice_labels.append(cls)
    print("Done processing slices.")
    return slices, slice_labels

slices, slice_labels = rand_sample_slices(file_names, seed=42)

Processing class dpv1...
Processing class dpv2...
Processing class croatia_ship...
Processing class bg_noise...
Done processing slices.


In [None]:
def plot_slices(slices,
                fs=128000,
                labels=None,
                crop_freq=2000,
                remove_dc=60,
                nperseg=32768,
                decimate_factor=4):

    fig = make_subplots(rows=2, cols=len(slices), subplot_titles=labels)

    for i, s in enumerate(slices):

        print(f"Processing slice {i+1}/{len(slices)}...")
        a = labels[i]

        # f, t, Sxx = ut.get_spectrogram(s, fs, crop_freq=crop_freq, nperseg=nperseg)
        # fig.add_trace(go.Heatmap(z=Sxx, x=t, y=f, colorscale='Viridis', showscale=False), row=1, col=i+1)

        f_pxx, Pxx = ut.pwelch(s, fs, remove_dc=remove_dc, crop_freq=crop_freq)
        fig.add_trace(go.Scatter(x=f_pxx, y=Pxx, mode='lines'), row=1, col=i+1)

        f_pxx, Pxx = signal.decimate(f_pxx, decimate_factor), signal.decimate(Pxx, decimate_factor)
        vis = ut.calc_vis_graph(f_pxx, Pxx)
        # dots = np.where(vis == 1)
        # fig.add_trace(go.Scatter(x=dots[0], y=dots[1], mode='markers', marker=dict(size=0.5, color='black')), row=2, col=i+1)

        values, vectors, counts = ut.graph_decomposition(vis)
        fig.add_trace(go.Scatter(x=np.arange(len(values)), y=values, mode='markers+lines'), row=2, col=i+1)

        # degrees, degree_distribution = ut.calc_degree_distribution(vis)
        # fig.add_trace(go.Scatter(x=degrees, y=degree_distribution, mode='markers+lines'), row=3, col=i+1)

    return fig

# plot_slices(slices, fs, labels=slice_labels, crop_freq=2000, remove_dc=20, nperseg=32768)

In [None]:
# slices, slice_labels = rand_sample_slices(file_names, seed=42)
# plot_slices(slices, labels=slice_labels, crop_freq=None, remove_dc=10)

In [15]:
arr = np.load('../data/ds1_5_classes/vg/visibility_dpv1_dpv1_slice_1.npy')
print(arr.shape)

dots = np.where(arr == 1)
print(len(dots[0]))


(32001, 32001)
1394134


In [19]:
class SparseGraph:
    def __init__(self, adjacency_matrix):
        if isinstance(adjacency_matrix, np.ndarray):
            self.adjacency_matrix = adjacency_matrix
        elif isinstance(adjacency_matrix, list):
            shape = adjacency_matrix[0]
            self.adjacency_matrix = np.zeros(shape, dtype=np.int8)
            for node in adjacency_matrix[1:]:
                i, j = node
                self.adjacency_matrix[i, j] = 1
                self.adjacency_matrix[j, i] = 1
        else:
            raise ValueError("Invalid adjacency matrix format.")
        
    def mat_as_list(self):
        shape = self.adjacency_matrix.shape
        edges = []
        for i in range(shape[0]):
            for j in range(i+1, shape[1]):
                if self.adjacency_matrix[i, j] == 1:
                    edges.append((i, j))
        return [shape] + edges

    def save_to_file(self, file_path):
        np.save(file_path, np.array(self.mat_as_list()))

In [20]:
for cls in classes:
    for file in file_names[cls]:
        tgt_folder = "../data/ds1_5_classes/vg/"
        i = os.path.basename(file).split(".")[0]
        tgt_path = os.path.join(tgt_folder, f"visibility_{cls}_{i}.npy")
        if os.path.exists(tgt_path):
            continue
        else:
            print(f"Processing file {file}...")

        fs, data = wavfile.read(file)
        f, pxx = ut.pwelch(data, fs, remove_dc=10, crop_freq=None)
        f, pxx = signal.decimate(f, 4), signal.decimate(pxx, 4)
        visibility = ut.calc_vis_graph(f, pxx)
        visibility = SparseGraph(visibility).mat_as_list()

        visibility.save_to_file(tgt_path)
        print(f"Saved visibility graph to {tgt_path}.")
        

Processing file ../data/ds1_5_classes/dpv1/dpv1_slice_55.wav...


AttributeError: 'list' object has no attribute 'save_to_file'

In [None]:
file_name = file_names['dpv1'][0]
fs, data = wavfile.read(file_name)
f, pxx = ut.pwelch(data, fs, remove_dc=10, crop_freq=None)
f, pxx = signal.decimate(f, 4), signal.decimate(pxx, 4)
vis_list = ut.nvg_dc_np(pxx, 0, len(pxx))

322
273
6
5
3
1
272
222
68
41
