In [4]:
# Imports
import torch
import pickle as pkl
import numpy as np
from typing import Dict
import os
import xarray as xr
from scipy.spatial import KDTree
from utils_clean_data import *
%load_ext autoreload
%autoreload 2
from basic_functions import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [29]:
def create_tensor_sv_ROI(pkl_src) : 
    list_sv=[]
    list_roi=[]

    with open(pkl_src, 'rb') as pkl_file :
        while True : 
            try : 
                _, data_dict = pkl.load(pkl_file)
                sv = data_dict["Sv"]
                lon = data_dict["LONGITUDE"]
                lat = data_dict["LATITUDE"]
                list_sv.append(sv)
                ss = are_points_in_polygon(lon, lat)
                list_roi.append(ss)
            except EOFError : 
                break
    sv_tensor = torch.tensor(np.concatenate(list_sv, axis=0))
    roi_tensor = torch.tensor(np.concatenate(list_roi))

    return sv_tensor, roi_tensor

In [30]:
class Dataset(torch.utils.data.Dataset) : 
    def __init__(self, data, targets) : 
        self.data = data
        self.targets = targets
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.data[index], self.targets[index]

In [37]:
sv, roi = create_tensor_sv_ROI("/home/elise/Documents/M1-BIM/S2/active_acoutics_analysis_sea_elephants/data/acoustic_data/18kHz/processed_data_gen4/IMOS_18kHz_spring_day.pkl")
ds = Dataset(sv, roi)
print(ds.data.shape)
dataloader = torch.utils.data.DataLoader(ds, batch_size=128, shuffle=True)
for sv, label in dataloader : 
    print (sv.shape, label)


torch.Size([13538, 196])
torch.Size([128, 196]) tensor([False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, F

In [33]:
def filter_all_from_cdf(src_path : str, dest_path:str, gebco_path:str, channel:str, bath_thr:int=-1000) : 
    # transform netCDF into pkl (trajectory_name, data_dict[sv, depth, lon, lat, day, time, season])
    pkl_path = extract_and_save_netcdf_data(src_path, dest_path, channel="18kHz")
   
    # filter bathymetry
    filter_by_bathymetry_gebco(pkl_path, gebco_path, bath_thr)
    
    # enlever les données des profondeurs extrêmes (superieures et inf)
    remove_extreme_depth(pkl_path)
    pkl_path = "/home/elise/Documents/M1-BIM/S2/active_acoutics_analysis_sea_elephants/data/acoustic_data/18kHz/processed_data_gen4/IMOS_18kHz.pkl"

    # filtrer par saison et période => summer_day, summer_night, spring_day, spring_night
    pkl_seasons = separate_by_season(pkl_path)
    pkl_spring = pkl_seasons[2]
    pkl_summer = pkl_seasons[3]

    pkl_periods_spring = separate_by_period(pkl_spring)
    pkl_spring_day = pkl_periods_spring[1]
    pkl_spring_night = pkl_periods_spring[4]

    pkl_periods_summer = separate_by_period(pkl_summer)
    pkl_summer_day = pkl_periods_summer[1]
    pkl_summer_night = pkl_periods_summer[4]

    list_pkl = [pkl_spring_day, pkl_spring_night, pkl_summer_day, pkl_summer_night]

    for pkl_file in list_pkl : 
        sv, roi = create_tensor_sv_ROI(pkl_file)
        ds = Dataset(sv, roi)
        dataloader = torch.utils.data.DataLoader(ds, batch_size=128, shuffle=True)
        
    # EM_pca pour enlever les NaN
    # Transformer en tenseurs de vecteurs 240

In [34]:
# Test
netCDF = "/home/elise/Documents/M1-BIM/S2/active_acoutics_analysis_sea_elephants/data/acoustic_data/18kHz/IMOS_18kHz"
src_path = netCDF
dest_path = "/home/elise/Documents/M1-BIM/S2/active_acoutics_analysis_sea_elephants/data/acoustic_data/18kHz/processed_data_gen4"
gebco_path = "/home/elise/Documents/M1-BIM/S2/active_acoutics_analysis_sea_elephants/data/bathymetry/GEBCO_10_Mar_2025_5906cede0691/gebco_2024_n-9.7048_s-70.5215_w58.714_e133.1174.nc"
pkl_src = filter_all_from_cdf(src_path, dest_path, gebco_path, "18kHz")


IMOS Bioacoustics sub-Facility dataset from Mauritius to HIMI in May 2016
IMOS Bioacoustics sub-Facility dataset from Mauritius to HIMI in Mar 2017
IMOS Bioacoustics sub-Facility dataset from 47 S 93 E to 46 S 113 E in Feb 2016
IMOS Bioacoustics sub-Facility dataset from HIMI to Mauritius in May 2017
IMOS Bioacoustics sub-Facility dataset from 46 S 113 E to Albany in Feb 2016

 filter by GEBCO 

IMOS Bioacoustics sub-Facility dataset from Mauritius to HIMI in May 2016
IMOS Bioacoustics sub-Facility dataset from Mauritius to HIMI in Mar 2017
IMOS Bioacoustics sub-Facility dataset from 47 S 93 E to 46 S 113 E in Feb 2016
IMOS Bioacoustics sub-Facility dataset from HIMI to Mauritius in May 2017
IMOS Bioacoustics sub-Facility dataset from 46 S 113 E to Albany in Feb 2016
end of file
Filtering complete. Original file updated.

 REMOVE XTREME DEPTHS 


 SPLIT BY SEASON 

[3]
[3]
[2]
[3]
[2]
End of file

 SPLIT BY PERIOD 

End of file.

 SPLIT BY PERIOD 

End of file.


In [None]:
# verification que la bathymetrie a bien été filtrée
pkl_path = "/home/elise/Documents/M1-BIM/S2/active_acoutics_analysis_sea_elephants/data/acoustic_data/18kHz/processed_data_gen4/IMOS_18kHz.pkl"
n_uncorrect_bath=0
with open(pkl_path, 'rb') as pkl_file : 
    for i in range(10) :
        try : 
            _, data_dict = pkl.load(pkl_file)
            # bath = data_dict["BATH"]
            # n_uncorrect_bath += np.sum(np.where(np.abs(bath)<1000))
            sv = data_dict["Sv"]
            depth = data_dict["DEPTH"]
            print(depth)
            print(sv.shape)
        except EOFError : 
            break


[ 22.5  27.5  32.5  37.5  42.5  47.5  52.5  57.5  62.5  67.5  72.5  77.5
  82.5  87.5  92.5  97.5 102.5 107.5 112.5 117.5 122.5 127.5 132.5 137.5
 142.5 147.5 152.5 157.5 162.5 167.5 172.5 177.5 182.5 187.5 192.5 197.5
 202.5 207.5 212.5 217.5 222.5 227.5 232.5 237.5 242.5 247.5 252.5 257.5
 262.5 267.5 272.5 277.5 282.5 287.5 292.5 297.5 302.5 307.5 312.5 317.5
 322.5 327.5 332.5 337.5 342.5 347.5 352.5 357.5 362.5 367.5 372.5 377.5
 382.5 387.5 392.5 397.5 402.5 407.5 412.5 417.5 422.5 427.5 432.5 437.5
 442.5 447.5 452.5 457.5 462.5 467.5 472.5 477.5 482.5 487.5 492.5 497.5
 502.5 507.5 512.5 517.5 522.5 527.5 532.5 537.5 542.5 547.5 552.5 557.5
 562.5 567.5 572.5 577.5 582.5 587.5 592.5 597.5 602.5 607.5 612.5 617.5
 622.5 627.5 632.5 637.5 642.5 647.5 652.5 657.5 662.5 667.5 672.5 677.5
 682.5 687.5 692.5 697.5 702.5 707.5 712.5 717.5 722.5 727.5 732.5 737.5
 742.5 747.5 752.5 757.5 762.5 767.5 772.5 777.5 782.5 787.5 792.5 797.5
 802.5 807.5 812.5 817.5 822.5 827.5 832.5 837.5 84

In [None]:
def EM_PCA(pkl_src) : 
    # Does not create a new pkl
    temp_path = pkl_path + ".tmp"

    with open(pkl_path, 'rb') as pkl_file, open(temp_path, 'wb') as temp_file :     
        _, data_dict = pkl.load(pkl_file)
        sv = data_dict["Sv"]
        
                
            

In [None]:
src_basename = os.path.dirname(gebco_path)
src_filename = os.path.splitext(src_basename)[0]
print(src_basename)
print(src_filename)

/home/elise/Documents/M1-BIM/S2/active_acoutics_analysis_sea_elephants/data/bathymetry/GEBCO_10_Mar_2025_5906cede0691
/home/elise/Documents/M1-BIM/S2/active_acoutics_analysis_sea_elephants/data/bathymetry/GEBCO_10_Mar_2025_5906cede0691
