In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from numpy.doc.constants import lines


In [None]:
PATH_ISC = "/home/rsafran/PycharmProjects/toolbox/data/ISC/ISC_Ocean_indien_2018.csv"
cnames = ['EVENTID','TYPE','AUTHOR','DATE','TIME','LAT','LON' ,'DEPTH','DEPFIX','DEPQUAL','AUTHOR_MG','TYPE_MG','MAG',"14",'15','16','17','18','19','20']
isc = pd.read_csv(PATH_ISC, comment='#',sep=',',header=None, names=cnames)
isc['datetime'] = pd.to_datetime(isc['DATE']+' '+ isc['TIME'])
isc.drop(['TYPE','AUTHOR','DATE','TIME',"14",'15','16','17','18','19','20'], axis=1, inplace=True)

In [None]:
catalog_path = '/media/rsafran/CORSAIR/Association/validated/refined_s_-60-5,35-120,350,0.8,0.6_final_filtered.npy'
catalogue = np.load(catalog_path,allow_pickle=True).item()



In [None]:
cat = pd.DataFrame.from_dict(catalogue['filtered_events'], orient='columns')
cat['lat']=cat['source_point'].apply(lambda x: x[1])
cat['lon']=cat['source_point'].apply(lambda x: x[0])
cat['uid']= pd.to_datetime( cat.uid.apply(lambda x: x.split('_')[0]))
cat.sort_values("uid", inplace=True, ignore_index=True)

In [None]:
ax1 = plt.axes(projection=ccrs.PlateCarree())
# These features will be drawn on top if the image is behind.
ax1.add_feature(cfeature.LAND, facecolor='lightgray', zorder=2)
ax1.add_feature(cfeature.COASTLINE, edgecolor='black', linewidth=1, zorder=3)
ax1.add_feature(cfeature.BORDERS, linestyle=':', edgecolor='black', zorder=3)
cat.plot('lat','lon',ax=plt.gca(),style='.' )
isc.plot('LON','LAT', ax=plt.gca(),style='.')


In [None]:
plt.subplot(2,1,1)
cat.uid.hist(bins=200)
isc.datetime.hist(bins=200)
plt.subplot(2,1,2)
cat.uid.hist(bins=100)

In [None]:
join_cat = pd.merge_asof(cat, isc, left_on='uid',right_on="datetime",tolerance=pd.Timedelta("30min"))

In [None]:
join_cat["time_error"] = join_cat["uid"]-join_cat["datetime"]

In [None]:
join_cat.dropna(inplace=True)
ax1 = plt.axes(projection=ccrs.PlateCarree())
# These features will be drawn on top if the image is behind.
ax1.add_feature(cfeature.LAND, facecolor='lightgray', zorder=2)
ax1.add_feature(cfeature.COASTLINE, edgecolor='black', linewidth=1, zorder=3)
ax1.add_feature(cfeature.BORDERS, linestyle=':', edgecolor='black', zorder=3)
join_cat.plot('lat','lon',ax=plt.gca(),style='.' )
join_cat.plot('LON','LAT', ax=plt.gca(),style='.')

In [None]:
np.mean(join_cat.time_error)

## Arrivals

In [None]:
from utils.data_reading.sound_data.station import StationsCatalog
CATALOG_PATH = "/media/rsafran/CORSAIR/OHASISBIO/recensement_stations_OHASISBIO_RS.csv"

DETECTIONS_DIR = "/home/rsafran/Bureau/tissnet/2018"
ASSOCIATION_OUTPUT_DIR = "../../../data/detection/association"
STATIONS = StationsCatalog(CATALOG_PATH).filter_out_undated().filter_out_unlocated()
STATIONS = STATIONS.by_dataset('2018')
for st in STATIONS :
    print(st.name)
    print(st.get_pos(include_depth=False))

In [None]:
from utils.physics.sound_model import ISAS_grid as isg
from pyproj import Geod
from multiprocessing import Manager

ISAS_PATH = "/media/rsafran/CORSAIR/ISAS/86442/field/2018"

GRID_LAT_BOUNDS = [-60, 5]
GRID_LON_BOUNDS = [35, 120]
DEPTH = 1250               # meters
SOUND_SPEED = 1480
PICKING_ERROR_BASE = 2
geod = Geod(ellps="WGS84")


def get_isas_data(month):
    """Load ISAS data if not already loaded in this process"""
    global process_local_isas_cache
    if month not in process_local_isas_cache:
        process_local_isas_cache[month] = isg.load_ISAS_TS(
            ISAS_PATH, month, GRID_LAT_BOUNDS, GRID_LON_BOUNDS, fast=False
        )
    return process_local_isas_cache[month]

def compute_travel_time(lat, lon, station_lat, station_lon, month, velocity_dict=None):
    """Travel time calculation using ISAS grid, loading data as needed"""
    ds = velocity_dict[month]
    # Error modeling with multiple components
    picking_err = PICKING_ERROR_BASE  # Base error in picking arrival times
    try:
        tt, total_err, dist_m= isg.compute_travel_time(
            lat, lon, station_lat, station_lon,
            DEPTH, ds,
            resolution=30,
            verbose=False,
            interpolate_missing=True
        )
    except ValueError:
        print(f"Error in ISAS calculation for lat={lat}, lon={lon}, station_lat={station_lat}, station_lon={station_lon}")
        _, _, dist_m = geod.inv(lon, lat, station_lon, station_lat)
        tt = dist_m / SOUND_SPEED
        total_err = tt * 0.1
    total_err = np.sqrt(picking_err**2 + total_err**2)

    return tt, total_err, dist_m

# Global cache for ISAS data per process
process_local_isas_cache = {}
# Create a manager for sharing data between processes
manager = Manager()
shared_velocity_grid = manager.dict()
# Load ISAS data into the shared dictionary
for m in range(1, 13):
    print(f"Loading ISAS data for month {m}...")
    shared_velocity_grid[m] = get_isas_data(m)


In [None]:
for st in STATIONS :
    print(st.name)
    name = st.name
    lat, lon = st.get_pos(include_depth=False)
    isc[f"travel_time_{name}"]=isc.apply(lambda x: compute_travel_time(x["LAT"], x["LON"], lat, lon, x.datetime.month, velocity_dict=shared_velocity_grid), axis=1)

In [None]:
isc

## Detections

In [None]:
from utils.detection.association import load_detections
import glob2
import datetime
from pathlib import Path

# Detections loading parameters
RELOAD_DETECTIONS = True # if False, load files called "detections.npy" and "detections_merged.npy" containing everything instead of the raw detection output. Leave at True by default
MIN_P_TISSNET_PRIMARY = 0.4  # min probability of browsed detections
MIN_P_TISSNET_SECONDARY = 0.1  # min probability of detections that can be associated with the browsed one
MERGE_DELTA_S = 10 # threshold below which we consider two events should be merged
MERGE_DELTA = datetime.timedelta(seconds=MERGE_DELTA_S)

if RELOAD_DETECTIONS:
    det_files = [f for f in glob2.glob(DETECTIONS_DIR + "/*") if Path(f).is_file()]
    DETECTIONS, DETECTIONS_MERGED = load_detections(det_files, STATIONS, DETECTIONS_DIR, MIN_P_TISSNET_PRIMARY, MIN_P_TISSNET_SECONDARY, MERGE_DELTA)
else:
    DETECTIONS = np.load(f"{DETECTIONS_DIR}/cache/detections.npy", allow_pickle=True).item()
    # DETECTIONS_MERGED = np.load(f"{DETECTIONS_DIR}/cache/detections_merged.npy", allow_pickle=True)
    DETECTIONS_MERGED = np.load(f"{DETECTIONS_DIR}/cache/refined_detections_merged.npy", allow_pickle=True)

In [None]:
from datetime import timedelta


def extract_times(detections, station_name):
    # Extract the station mapping
    station_mapping = {station_obj.name: station_obj for station_obj in detections.keys()}

    if station_name not in station_mapping:
        print(f"Station {station_name} not found. Available: {list(station_mapping.keys())}")
        return None

    station_obj = station_mapping[station_name]
    # Extract the detection times
    times = [row[0] for row in detections[station_obj]]
    return times


def check_detection(detections_df, catalogue_arrival_times, time_tolerance_seconds=15):
    """
    Check each detection time against the catalogue's arrival times and return a boolean column.

    Args:
        detections_df (pd.DataFrame): The dataframe with detection times.
        catalogue_arrival_times (list): List of datetime objects representing theoretical arrival times.
        time_tolerance_seconds (int): Time tolerance (in seconds) for matching arrival times.

    Returns:
        pd.DataFrame: The updated dataframe with the 'is_teleseismic' column.
    """
    # Convert the catalogue arrival times to a pandas Series
    catalogue_series = pd.to_datetime(catalogue_arrival_times)

    # Expand the catalogue arrival times into a DataFrame column for comparison
    detections_df['is_in_isc'] = detections_df['detection_time'].apply(
        lambda detection_time: any(
            abs(detection_time - catalogue_series) <= timedelta(seconds=time_tolerance_seconds)
        )
    )
    return detections_df

In [None]:
time_tol= [5,10,15,20,25,30,50]
res = []
for time_tolerance_seconds in time_tol:
    for st in STATIONS:
        name = st.name
        print(name)

        isc[f"arrival_time_{name}"] = isc.apply(lambda x: x.datetime + pd.Timedelta(seconds=x[f'travel_time_{name}'][0]), axis=1)
        catalogue = isc[[f"arrival_time_{name}","EVENTID"]]
        detection_times = extract_times(DETECTIONS, name)
        detections_df = pd.DataFrame(detection_times, columns=['detection_time'])
        detections_df['detection_time'] = pd.to_datetime(detections_df['detection_time'])  # Ensure correct datetime format

        updated_df = check_detection(detections_df, catalogue[f'arrival_time_{name}'], time_tolerance_seconds)
        # updated_df['is_in_isc'] = updated_df['is_in_isc']
        print(updated_df['is_in_isc'].value_counts())
        res.append({"tol":time_tolerance_seconds, name : updated_df['is_in_isc'].value_counts()[1] })

In [None]:
res

In [None]:
fig,axes = plt.subplots(ncols=3,nrows=3)
for j in range(len(res)):
    name =['ELAN', 'MADE', 'MADW', 'NEAMS', 'RTJ', 'SSEIR', 'SSWIR', 'SWAMSbot', 'WKER2'][j%9]
    plt.plot(res[j]['tol'], res[j][name])

In [None]:
stations = ['ELAN', 'MADE', 'MADW', 'NEAMS', 'RTJ', 'SSEIR', 'SSWIR', 'SWAMSbot', 'WKER2']


fig, axs = plt.subplots(nrows=3, ncols=3, figsize=(9, 6),
                        subplot_kw={'xticks': [], 'yticks': []})
for j in range(len(res)):
    for ax, interp_method in zip(axs.flat, stations):
        name =['ELAN', 'MADE', 'MADW', 'NEAMS', 'RTJ', 'SSEIR', 'SSWIR', 'SWAMSbot', 'WKER2'][j%9]
        ax.plot(res[j]['tol'], res[j][name],'o')
        ax.set_title(str(interp_method))

plt.tight_layout()
plt.show()

In [None]:
res[0:9][[0]]