In [None]:
# imports and settings

import os
import time
import pickle
import warnings
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

import numpy as np
from numpy import linalg as LA
from numpy import histogram2d

from scipy import signal
from scipy.fft import fft, fftfreq, fftshift
from scipy.signal import find_peaks, butter, filtfilt, welch
from scipy.ndimage import gaussian_filter
from scipy.io import wavfile

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import utils as ut
from utils import try_except_decorator
%load_ext autoreload
%autoreload 2

import viterbi
%load_ext autoreload
%autoreload 2

# do not show warnings
warnings.filterwarnings("ignore")

# plotting parameters
height = 800
width = 1400
font_size = 16

In [None]:
# upload files and process data into slices and time annotations

##############################
# dpv1 - Croatia
##############################

data_file = "../data/scooter_example_1.wav"
dpv1_fs, dpv1_data = wavfile.read(data_file)

# crop data
start_time = 60 # seconds
end_time = 160 # seconds
dpv1_data = dpv1_data[int(start_time*dpv1_fs):int(end_time*dpv1_fs)]

# set time annotations
# 1st detection: 3-37 seconds
d1_start = 3
d1_end = 37
# 2nd detection: 91-143 seconds
d2_start = 91
d2_end = 143
# need to account for time cropping
dpv1_time_annotations = np.zeros_like(dpv1_data, dtype=int)
if d1_end > start_time and d1_start < end_time:
    s = max(d1_start, start_time) - start_time
    e = min(d1_end, end_time) - start_time
    dpv1_time_annotations[int(s*dpv1_fs):int(e*dpv1_fs)] = 1
if d2_end > start_time and d2_start < end_time:
    s = max(d2_start, start_time) - start_time
    e = min(d2_end, end_time) - start_time
    dpv1_time_annotations[int(s*dpv1_fs):int(e*dpv1_fs)] = 1

dpv1_slice = dpv1_data[int(35*dpv1_fs):int(45*dpv1_fs)]


##############################
# dpv2 - Haifa
##############################

data_file = "../data/RBW6922_20250612_060900_copy.wav"
dpv2_fs, dpv2_data = wavfile.read(data_file)

# crop data
start_time = 0 # seconds
end_time = 120 # seconds
dpv2_data = dpv2_data[int(start_time*dpv2_fs):int(end_time*dpv2_fs)]

dpv2_slice = dpv2_data[int(15*dpv2_fs):int(25*dpv2_fs)]


# ##############################
# # Low SNR DPV - Croatia
# ##############################

# data_file = "../data/croatia/2507_1_1000m/RBW6737_20250725_083000.wav"
# fs, lsnr_data = wavfile.read(data_file)

# lsnr_slice = lsnr_data[int(40*fs):int(50*fs)]


##############################
# Ship
##############################

# data_file = "../data/ships/Yacht_02.08.23_111540_20secCPA.wav"
data_file = "../data/ships/Motorboat_02.08.23_112229_20secCPA.wav"
ship_fs, ship_data = wavfile.read(data_file)
ship_time_annotations = np.ones_like(ship_data, dtype=int)

ship_slice = ship_data[int(5*ship_fs):int(15*ship_fs)]


##############################
# Background Noise - Croatia
##############################
data_file = "../data/croatia/2507_1_1000m/RBW6737_20250725_080600.wav"
bg_fs, bg_data = wavfile.read(data_file)
bg_noise_time_annotations = np.ones_like(bg_data, dtype=int)

bg_slice = bg_data[int(5*bg_fs):int(15*bg_fs)]


fss = {
    "dpv1": dpv1_fs,
    "dpv2": dpv2_fs,
    # "low_snr_dpv1": fs,
    "ship": ship_fs,
    "bg": bg_fs}

all_data = {
    "dpv1": dpv1_data,
    "dpv2": dpv2_data,
    # "low_snr_dpv1": lsnr_data,
    "ship": ship_data,
    "bg": bg_data}

slices = {
    "dpv1": dpv1_slice,
    "dpv2": dpv2_slice,
    # "low_snr_dpv1": lsnr_slice,
    "ship": ship_slice,
    "bg": bg_slice}

for title, data in all_data.items():
    print(f"{title} - Duration: {len(data)/fss[title]} seconds, Samples: {len(data)}, with salice of {len(slices[title])/fss[title]} seconds, Samples: {len(slices[title])}")
    