In [1]:
import datetime
from pathlib import Path
import numpy as np
from matplotlib import pyplot as plt

from tqdm.notebook import tqdm
from scipy.stats import chi2, t as student

from utils.data_reading.catalogs.ISC import ISC_file
from utils.data_reading.catalogs.association import Association
from utils.data_reading.catalogs.events import AcousticReception
from utils.data_reading.sound_data.station import StationsCatalog
from utils.physics.sound.sound_model import HomogeneousSoundModel
from utils.physics.sound.sound_velocity_grid import MonthlySoundVelocityGridOptimized



In [13]:
year = 2018
RES_FILE = f"../../data/detections/{year}/matchups_clusters_loc.csv"

sound_model_h = HomogeneousSoundModel()
sound_model_g = MonthlySoundVelocityGridOptimized(
    [f"../../data/sound_model/min-velocities_month-{i:02d}.nc" for i in range(1, 13)], interpolate=True)
datasets_yaml = "/home/plerolland/Bureau/dataset.yaml"
stations_c = StationsCatalog(datasets_yaml).filter_out_undated().filter_out_unlocated()

isc_file = f"/home/plerolland/Bureau/catalogs/ISC/eqk_isc_{year}.txt"
isc = ISC_file(isc_file)

In [14]:
with open(RES_FILE, "r") as f:
    lines = f.readlines()

associations, computed_dates, computed_locs, uncertainties, ref_iscs = [], [], [], [], []

for line in tqdm(lines):
    line = line.split(",")
    
    associations.append(Association())
    computed_dates.append(datetime.datetime.strptime(line[0], "%Y%m%d_%H%M%S"))
    computed_locs.append((float(line[1]), float(line[2])))
    uncertainties.append((float(line[3]), float(line[4]), float(line[5])))
    ref_iscs.append(isc[int(line[6])])
    
    stations, dates = line[7::2], line[8::2]
    for station, date in zip(stations, dates):
        # date is supposed to be the last element of the line, so it can contain "\n" or be empty if the file was opened and saved with excel
        if len(date.strip()) == 0:
            continue
        date = datetime.datetime.strptime(date.strip(), "%Y%m%d_%H%M%S")  
        station = stations_c.by_starting_year(int(station.split("-")[-1])).by_names("-".join(station.split("-")[:-1]))[0]
        associations[-1].add_event(AcousticReception(station, date))
        
associations, computed_dates, computed_locs, uncertainties, ref_iscs = np.array(associations), np.array(computed_dates), np.array(computed_locs), np.array(uncertainties), np.array(ref_iscs), 

  0%|          | 0/5718 [00:00<?, ?it/s]

In [29]:
ref_locs = np.array([ref.get_pos() for ref in ref_iscs])

diff = []
for i in range(len(ref_locs)):
    c_lat, c_lon, lat_r, lon_r = computed_locs[i,0], computed_locs[i,1], ref_locs[i,0], ref_locs[i,1]
    if np.abs(ref_locs[i,1]-computed_locs[i,1])>180:
        lon_r, c_lon = (lon_r+360, c_lon) if lon_r<0 else (lon_r, c_lon+360)
    diff.append(np.sqrt((c_lat-lat_r)**2+(c_lon-lon_r)**2))
diff = np.array(diff)

In [77]:
print(f"{np.count_nonzero(diff > 1)}/{len(diff)}")
worst = np.argsort(diff)[-4]
print(computed_locs[worst], ref_locs[worst], diff[worst])
loc_h = associations[worst].compute_source(sound_model_h)


alpha = 0.05

# EQM = mean quadratic error AKA sigma0 comp

J = loc_h.jac
param_cov = np.linalg.inv(J.T.dot(J))
print(param_cov)
res_cov = np.ones((len(loc_h.fun), len(loc_h.fun)))/len(loc_h.fun) - J.dot(param_cov.dot(J.T))
EQM = np.sqrt(loc_h.cost)

testResiduals = loc_h.fun
for i in range(testResiduals.shape[0]):
    testResiduals[i] = testResiduals[i]/np.sqrt(EQM * np.abs(res_cov[i,i]))

df = len(loc_h.fun) - len(loc_h.x)
stud = 2*student.ppf(1-alpha/2, df) # 2* because we test on the entire law student interval of confidence
print(testResiduals, stud)

3628/5718
[  65.106  -124.4615] [22.8723 61.3477] 179.23760827552346
[[ 4.23010058e+04  2.69435585e+02 -8.05820243e+03]
 [ 2.69435585e+02  1.72203130e+00 -5.12787820e+01]
 [-8.05820243e+03 -5.12787820e+01  1.53546252e+03]]
[0.1321973  0.1321973  1.51355344 9.5425409  2.86611378] 8.60530545982255


In [74]:
association2 = Association(np.array(associations[worst].events)[[0,2,4]])
print(association2.compute_source(sound_model_h))

     message: `xtol` termination condition is satisfied.
     success: True
      status: 3
         fun: [ 1.244e-05  6.460e-06  1.386e-05]
           x: [-1.232e+03 -7.985e+00  5.992e+01]
        cost: 1.943196145925608e-10
         jac: [[ 3.549e-01  7.358e+01 -1.301e+01]
               [-2.966e-01  3.593e+01  1.745e+01]
               [-1.000e+00 -7.467e+01  2.947e+00]]
        grad: [-1.136e-05  1.126e-04 -8.280e-06]
  optimality: 0.014001266115577492
 active_mask: [0 0 0]
        nfev: 20
        njev: 18


In [67]:
for worst in np.argsort(diff)[::-1][:10]:
    association2 = Association(np.array(associations[worst].events)[[0,2,3,4]])
    loc_h = association2.compute_source(sound_model_h)
    print(loc_h.x, loc_h.fun)

[-8594.35085908    88.99706749   180.        ] [0.07680674 6.66921627 8.10463224 1.51222271]
[-8613.68206066    88.483242     180.        ] [ 4.63141767 13.32208765  2.8304072  11.52107718]
[-8676.38140438    86.86454252   180.        ] [0.46634388 2.18720906 1.77803641 4.43158936]
[-8683.91316853    86.49186342   180.        ] [ 0.20009626  4.73861245 13.89027951  8.95157081]
[-6390.51703483    55.48449655    39.31221794] [ 4.32620396  4.04937218 10.5582061  10.28137422]
[-9252.05522574    75.03023615   -67.33560666] [11.17382349  1.42357555 12.26703431  0.33036521]
[-8656.72185723    87.46326587   180.        ] [3.49996871 0.23409976 0.72570035 4.45976883]
[-10010.36555094     70.75843032   -120.09936205] [5.74205583 3.85991721 3.631399   5.51354219]
[-309.02606382  -20.49142765   64.88008433] [19.9233995   2.79150076 20.88324627  1.83165499]
[-8671.41818902    86.55423619   180.        ] [ 0.59376128 10.66428298 11.46830964  0.21026538]


NameError: name 'glob' is not defined