In [1]:
import datetime
import math
import os.path
import shutil
from pathlib import Path
import zipfile

import matplotlib.pyplot as plt
import numpy as np
from scipy import signal
from scipy.signal import find_peaks
from tqdm.notebook import tqdm
import pickle
import tensorflow as tf

from utils.data_reading.catalogs.isc import ISC_file
from utils.data_reading.sound_data.station import StationsCatalog
from utils.physics.sound_model import MonthlyGridSoundModel, HomogeneousSoundModel
from utils.training.keras_models import TiSSNet
from utils.transformations.features_extractor import STFTFeaturesExtractor

In [2]:
datasets_yaml = "/home/plerolland/Bureau/dataset.yaml"
output_dir = "/media/plerolland/LaBoite/BFD"
SPECTRO_DELTA = datetime.timedelta(seconds=200)
STA_DELTA = datetime.timedelta(seconds=2)
STFT_computer = STFTFeaturesExtractor(None, vmin=-35, vmax=140)  # size 256, 50% overlap -> 1 pixel for 128 pts

time_res = 0.5
freq_res = 120/128
stations = StationsCatalog(datasets_yaml).filter_out_undated().filter_out_unlocated()
csv_index = f"{output_dir}/index.csv"

In [3]:
for station in stations:
    STFT_computer.manager = station.get_manager()
    STFT_computer.nperseg = round(station.get_manager().sampling_f / freq_res)
    STFT_computer.overlap = 1 - time_res * station.get_manager().sampling_f / STFT_computer.nperseg
    sta_offset = int(STA_DELTA.total_seconds() * station.manager.sampling_f)
    
    station_dir = f'{output_dir}/{station.date_start.year}/{station.name}'
    if Path(f"{station_dir}.zip").exists():
        print(f"{station_dir}.zip already exists")
        continue
    
    start_date = station.date_start + SPECTRO_DELTA * 0.5 + datetime.timedelta(days=1)
    end_date = station.date_end - SPECTRO_DELTA * 0.5 - datetime.timedelta(days=1)
    
    for i in (pbar := tqdm(range(0, 1 + int((end_date - start_date) / SPECTRO_DELTA)))):
        pbar.set_description(f'{station} - {start_date.year}')
        
        current_date = start_date + i * SPECTRO_DELTA
        file_prefix = f'{station_dir}/{current_date.strftime("%m%d")}'
        Path(file_prefix).mkdir(parents=True, exist_ok=True)
        file_prefix = f'{file_prefix}/{current_date.strftime("%H%M%S")}'
        
        
        if not Path(f'{file_prefix}_SPECTRO.png').exists():
            # compute the spectro
            (f, t, spectro) = STFT_computer.get_features(current_date - SPECTRO_DELTA*0.5, current_date + SPECTRO_DELTA*0.5)
            
            # compute STA LTA
            #pts_lta = station.manager.getSegment(current_date - SPECTRO_DELTA*0.5 - LTA_DELTA*0.5, current_date + SPECTRO_DELTA*0.5 + LTA_DELTA*0.5)
            pts_sta = station.manager.getSegment(current_date - SPECTRO_DELTA*0.5 - STA_DELTA*0.5, current_date + SPECTRO_DELTA*0.5 + STA_DELTA*0.5)
            
            if 0 in pts_sta:
                # incomplete data that was filled with 0s, we skip it
                continue
                
            # high pass filter with cutoff at 5Hz to remove low frequency noise
            pts_sta = np.square(pts_sta)
            lta = np.mean(pts_sta)
            stas = np.convolve(pts_sta, np.ones(sta_offset)/sta_offset, mode='valid')[::STFT_computer.nperseg][1:-1]
            sta_lta = stas / lta
            
            # save STA LTA
            np.save(f'{file_prefix}_STALTA.npy', sta_lta)
            
            # save spectro    
            STFT_computer._save_features((f, t, spectro), f'{file_prefix}_SPECTRO.png')
            
            with open(csv_index, "a") as f:
                f.write(f"{file_prefix}\n")
    
    with zipfile.ZipFile(f"{station_dir}.zip", 'w', zipfile.ZIP_STORED) as archive:
        for root, dirs, files in os.walk(station_dir):
            for file in files:
                file_path = os.path.join(root, file)
                archive.write(file_path, os.path.relpath(file_path, station_dir))
        shutil.rmtree(station_dir)

In [None]:
path = f'{output_dir}/2022/MADW/2/20220204_105228_'
date = datetime.datetime(2022,2,4,10,52,28)
manager = stations.by_names("MADW").by_date(date)[0].get_manager()
data = manager.getSegment(date-SPECTRO_DELTA*0.5, date+SPECTRO_DELTA*0.5)
(f, t, spectro) =STFT_computer.get_features(date - SPECTRO_DELTA*0.5, date + SPECTRO_DELTA*0.5)

plt.imshow(spectro, aspect="auto")


In [None]:
plt.plot(np.load(f'{path}STALTA.npy'))
plt.xlim(0,373)

In [None]:
LTA_DELTA = datetime.timedelta(seconds=300)
STA_DELTA = datetime.timedelta(seconds=10)
lta_offset = int(LTA_DELTA.total_seconds()*240)
sta_offset = int(STA_DELTA.total_seconds()*240)


pts_lta = manager.getSegment(date - SPECTRO_DELTA*0.5 - LTA_DELTA*0.5, date + SPECTRO_DELTA*0.5 + LTA_DELTA*0.5)
pts_sta = manager.getSegment(date - SPECTRO_DELTA*0.5 - STA_DELTA*0.5, date + SPECTRO_DELTA*0.5 + STA_DELTA*0.5)

pts_lta = np.square(pts_lta)
pts_sta = np.square(pts_sta)

ltas = np.convolve(pts_lta, np.ones(lta_offset)/lta_offset, mode='valid')[::time_res * station.get_manager().sampling_f][1:-1]
stas = np.convolve(pts_sta, np.ones(sta_offset)/sta_offset, mode='valid')[::time_res * station.get_manager().sampling_f][1:-1]

plt.plot(stas, label="stas")
plt.plot(ltas, label="ltas")
plt.xlim(0,373)
plt.legend()

In [None]:
plt.plot(stas/ltas, label="stas/ltas")