In [1]:
import math
from pathlib import Path

from scipy import signal
import datetime
import os.path

import numpy as np
from scipy.signal import find_peaks, peak_widths
from tqdm.notebook import tqdm
import pickle
import tensorflow as tf

from utils.data_reading.sound_data.station import StationsCatalog
from utils.transformations.features_extractor import STFTFeaturesExtractor

2024-08-27 10:41:21.255393: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-27 10:41:21.342888: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-27 10:41:21.623126: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-27 10:41:21.623441: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-27 10:41:21.671968: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [2]:
datasets_yaml = "/home/plerolland/Bureau/dataset.yaml"
out_root = "/media/plerolland/LaBoite/spectros"

DELTA = datetime.timedelta(seconds=200)
TIME_RES = 0.5
F_RES = 0.5
overlap = 0.1

In [None]:
for year in [2020]:
    print(f"Processing year {year}")
    
    stations = StationsCatalog(datasets_yaml).filter_out_undated().filter_out_unlocated()
    stations = stations.ends_after(datetime.datetime(year,1,1) - datetime.timedelta(days=1))
    stations = stations.starts_before(datetime.datetime(year+1,1,1) + datetime.timedelta(days=1))
    stft_computer = STFTFeaturesExtractor(None, vmin=60, vmax=110)
    
    for station in stations:
        out_station = f'{out_root}/{year}/{station.name}-{station.date_start.year}'
        Path(out_station).mkdir(exist_ok=True, parents=True)
        
        csv_file = f'{out_station}/index.csv'
        if not Path(csv_file).is_file():
            with open(csv_file, "w") as f:
                f.write("day,datetime_start\n")
                
        done = {}
        with open(csv_file, "r") as f:
            content = f.readlines()[1:]
            for line in content:
                line = line[:-1].split(",")
                line[0] = int(line[0])
                if line[0] not in done:
                    done[line[0]] = []
                done[line[0]].append(line[1])
        
        print(f"Processing station {station.name}")
        manager = station.get_manager()
        manager.cache_size=1
        stft_computer.manager = manager
        stft_computer.nperseg = int(manager.sampling_f / F_RES)
        stft_computer.overlap = 1 - TIME_RES * manager.sampling_f / stft_computer.nperseg
        sta_offset = int(10 * manager.sampling_f)
        
        
        start = max(datetime.datetime(year,1,1), station.date_start)
        end = min(datetime.datetime(year+1,1,1), station.date_end)
        steps = math.ceil((end-start)/(DELTA*(1-overlap)))
        
        peaks_kept = []
        batch = []
        for i in tqdm(range(1, steps)):
            seg_start = start + i*(1-overlap)*DELTA
            seg_start_str = seg_start.strftime("%Y%m%d_%H%M%S")
            seg_end = seg_start+DELTA
    
            if seg_end > end:
                continue # we don't make smaller spectrograms in the ends of the datasets
            
            day = seg_start.timetuple().tm_yday
            if day in done and seg_start_str in done[day]:
                continue  # already done
            
            out = f'{out_station}/{day}'
            Path(out).mkdir(exist_ok=True, parents=True)
            
            data = manager.getSegment(seg_start, seg_end)
            stft_features = stft_computer._get_features(data)
            stft_computer._save_features(stft_features, f'{out}/{seg_start_str}.png')
            
            data = data if data is not None else manager.getSegment(seg_start, seg_end)
            pts_sta = np.square(data)
            lta = np.sqrt(np.mean(pts_sta))
            stas = np.sqrt(np.convolve(pts_sta, np.ones(sta_offset) / sta_offset, mode='same')[::int(stft_computer.nperseg*(1-stft_computer.overlap))])
            sta_lta = stas / lta
            if stft_features is not None and len(sta_lta) != stft_features[2].shape[1]:
                sta_lta = signal.resample(sta_lta,stft_features[2].shape[1])
            np.save(f'{out}/{seg_start_str}_stalta.npy', sta_lta)
                
            with open(csv_file, "a") as f:
                f.write(f'{day},{seg_start.strftime("%Y%m%d_%H%M%S")}\n')
    
        print(f"Station {station.name} processed")

Processing year 2020
Processing station ELAN


  0%|          | 0/168322 [00:00<?, ?it/s]

Station ELAN processed
Processing station MADW


  0%|          | 0/172564 [00:00<?, ?it/s]

Station MADW processed
Processing station NEAMS


  0%|          | 0/163458 [00:00<?, ?it/s]

Station NEAMS processed
Processing station RTJ


  0%|          | 0/140896 [00:00<?, ?it/s]

Station RTJ processed
Processing station SSEIR


  0%|          | 0/162100 [00:00<?, ?it/s]

Station SSEIR processed
Processing station SWAMS


  0%|          | 0/164954 [00:00<?, ?it/s]

In [None]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

path = "/me9ia/plerolland/akoustik/spectros/2018/ELAN/16/20180116_174204"
img = mpimg.imread(f'{path}.png')
sta_lta = np.load(f'{path}_stalta.npy')

_ = plt.figure(1)
plt.imshow(img, aspect='auto', cmap='jet')

_ = plt.figure(2)
plt.plot(sta_lta)
