In [1]:
from haversine import haversine_vector
import pandas as pd
import numpy as np
import glob
import os
import sys
sys.path.append('../resources/library')
from tropical_cyclone.georeferencing import round_to_grid
from tropical_cyclone.cyclone import init_track_dataframe, tracking_algorithm, paper_tracking_algorithm, track_matching
from tropical_cyclone.visualize import plot_tracks

import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# select the model to analyze
# selected_model = '02_swin_msl_vo_850'
# selected_model = '03_vgg_v3_relu_ks3_msl_vo_850'
# selected_model = '04_vgg_v3_relu_ks5_msl_vo_850'
# selected_model = '05_vgg_v3_linear_ks3_msl_vo_850'
# selected_model = '06_swin_msl_vo_850'
# selected_model = '07_vgg_v3_silu_ks3_msl_vo_850'
# selected_model = '08_vgg_v1_relu_ks3_msl_vo_850'
# selected_model = '09_vgg_v1_relu_ks3_msl_vo_850_lnc3'
# selected_model = '10_swin_lnc3_msl_vo_850'
selected_model = '11_swin_fg10_t_500_msl_vo_850'

# define inference directory to draw detections
dataset_dir = '../data/inference'
# get ibtracs directory
ibtracs_src = '../data/ibtracs/filtered/ibtracs_main-tracks_6h_1980-2021_TS-NR-ET-MX-SS-DS.csv'
# define test years (same as paper)
# test_years = [i for i in range(1980,2020)]
test_years = [1983, 1984, 1993, 1994, 2003, 2004, 2013, 2014]
# test_years = [2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]
# test_years = [1993]
# kilometer threshold
max_distance_detection = 1000.0

In [3]:
# get model directory
model_dir = os.path.join(dataset_dir, selected_model)
# get inference filenames
inference_files = [os.path.join(model_dir, f'{year}.csv') for year in test_years]
model_dir, inference_files

('../data/inference/11_swin_fg10_t_500_msl_vo_850',
 ['../data/inference/11_swin_fg10_t_500_msl_vo_850/1983.csv',
  '../data/inference/11_swin_fg10_t_500_msl_vo_850/1984.csv',
  '../data/inference/11_swin_fg10_t_500_msl_vo_850/1993.csv',
  '../data/inference/11_swin_fg10_t_500_msl_vo_850/1994.csv',
  '../data/inference/11_swin_fg10_t_500_msl_vo_850/2003.csv',
  '../data/inference/11_swin_fg10_t_500_msl_vo_850/2004.csv',
  '../data/inference/11_swin_fg10_t_500_msl_vo_850/2013.csv',
  '../data/inference/11_swin_fg10_t_500_msl_vo_850/2014.csv'])

In [4]:
# load csv files
csv_files = []
for file in inference_files:
    csv_files.append(pd.read_csv(file, index_col=0))
# merge csv files together
detections = pd.concat(csv_files).reset_index(drop=True)
# convert iso time with pandas
detections['ISO_TIME'] = pd.to_datetime(detections['ISO_TIME'])
# add WS as np.inf
detections['WS'] = np.inf
detections

Unnamed: 0,ISO_TIME,LAT,LON,WS
0,1983-05-21 00:00:00,56.25,190.75,inf
1,1983-05-21 00:00:00,33.50,206.50,inf
2,1983-05-21 00:00:00,30.00,100.50,inf
3,1983-05-21 00:00:00,8.25,267.50,inf
4,1983-05-21 06:00:00,32.50,207.75,inf
...,...,...,...,...
54133,2014-12-31 12:00:00,7.00,240.75,inf
54134,2014-12-31 18:00:00,27.25,204.00,inf
54135,2014-12-31 18:00:00,7.75,119.25,inf
54136,2014-12-31 18:00:00,6.75,170.50,inf


In [5]:
columns = ['ISO_TIME','SID','NATURE','WMO_WIND','LAT','LON']
# load ibtracs
observations = pd.read_csv(ibtracs_src, index_col=0)
# convert iso time with pandas
observations['ISO_TIME'] = pd.to_datetime(observations['ISO_TIME'])
# get only some columns from ibtracs
observations = observations[columns]
# round lat and lon to be comparable with training data
observations['LAT'] = round_to_grid(observations['LAT'], grid_res=0.25)
observations['LON'] = round_to_grid(observations['LON'], grid_res=0.25)
observations

Unnamed: 0,ISO_TIME,SID,NATURE,WMO_WIND,LAT,LON
0,1980-03-16 00:00:00,1980076N06148,NR,,6.00,147.75
1,1980-03-16 06:00:00,1980076N06148,NR,,6.00,147.25
2,1980-03-16 12:00:00,1980076N06148,NR,,6.00,146.75
3,1980-03-16 18:00:00,1980076N06148,NR,,6.00,146.50
4,1980-03-17 00:00:00,1980076N06148,NR,,6.00,146.00
...,...,...,...,...,...,...
86188,2021-12-16 06:00:00,2021349N05108,TS,,4.50,105.00
86189,2021-12-16 12:00:00,2021349N05108,TS,,4.25,104.50
86190,2021-12-16 18:00:00,2021349N05108,TS,,4.25,103.00
86191,2021-12-17 00:00:00,2021349N05108,TS,,4.25,103.00


In [6]:
tmp = pd.merge(left=detections, right=observations, on='ISO_TIME', how='inner')
tmp = tmp[tmp['ISO_TIME'].dt.year.isin(test_years)]
dates = tmp['ISO_TIME'].to_numpy()

In [7]:
print(f'There are {len(observations)} observations and {len(detections)} detections')

There are 86193 observations and 54138 detections


In [8]:
# get only detections and observations present on both dataframes
detections = detections[detections['ISO_TIME'].isin(dates)].reset_index(drop=True)
observations = observations[observations['ISO_TIME'].isin(dates)].reset_index(drop=True)

# select only TCs belonging to a certain nature
# observations = observations[observations['NATURE'].isin(['TS','SS','ET'])]

In [9]:
print(f'There are {len(observations)} observations and {len(detections)} detections')

There are 16708 observations and 54129 detections


# Localization

In [10]:
# merge together detections and ibtracs
matches = pd.merge(left=detections, right=observations, on='ISO_TIME')
# compute haversine distance between any couple of points
matches['HDIST'] = haversine_vector(array1=matches[['LAT_x','LON_x']].to_numpy(), array2=matches[['LAT_y','LON_y']].to_numpy(), normalize=True)
matches.head()

Unnamed: 0,ISO_TIME,LAT_x,LON_x,WS,SID,NATURE,WMO_WIND,LAT_y,LON_y,HDIST
0,1983-05-21 00:00:00,56.25,190.75,inf,1983141N07269,TS,25,7.0,268.5,8605.26955
1,1983-05-21 00:00:00,33.5,206.5,inf,1983141N07269,TS,25,7.0,268.5,6992.117142
2,1983-05-21 00:00:00,30.0,100.5,inf,1983141N07269,TS,25,7.0,268.5,15705.972826
3,1983-05-21 00:00:00,8.25,267.5,inf,1983141N07269,TS,25,7.0,268.5,177.384983
4,1983-05-21 06:00:00,32.5,207.75,inf,1983141N07269,TS,25,7.5,267.25,6711.922984


In [11]:
# remove all the distances above `max_distance_localization` km
matches = matches[matches['HDIST'] < max_distance_detection]
# group by LATx and LONx and find the minimum (to remove x duplicates)
matches = matches.groupby(by=['ISO_TIME','LAT_x','LON_x','SID','NATURE','WMO_WIND']).min('HDIST').reset_index()
# repeat grouping by LATy and LONy and find the minimum (to remove y duplicates)
matches = matches.groupby(by=['ISO_TIME','LAT_y','LON_y','SID','NATURE','WMO_WIND']).min('HDIST').reset_index()
# show result
matches

Unnamed: 0,ISO_TIME,LAT_y,LON_y,SID,NATURE,WMO_WIND,LAT_x,LON_x,WS,HDIST
0,1983-05-21 00:00:00,7.00,268.50,1983141N07269,TS,25,8.25,267.50,inf,177.384983
1,1983-05-21 06:00:00,7.50,267.25,1983141N07269,TS,25,6.75,267.00,inf,87.839718
2,1983-05-21 12:00:00,7.75,266.25,1983141N07269,TS,30,8.00,265.50,inf,138.477079
3,1983-05-21 18:00:00,8.00,265.25,1983141N07269,TS,40,7.25,267.75,inf,287.871819
4,1983-05-22 00:00:00,8.50,264.50,1983141N07269,TS,45,8.75,265.25,inf,87.013134
...,...,...,...,...,...,...,...,...,...,...
14693,2014-12-30 18:00:00,9.25,122.00,2014362N07130,TS,,9.75,120.50,inf,173.645633
14694,2014-12-31 00:00:00,8.75,121.50,2014362N07130,TS,,9.25,120.50,inf,123.096570
14695,2014-12-31 06:00:00,8.25,120.75,2014362N07130,TS,,9.25,120.00,inf,138.412989
14696,2014-12-31 12:00:00,7.50,120.25,2014362N07130,TS,,8.00,120.25,inf,55.597540


In [12]:
min_distance_localization = matches['HDIST'].min()
max_distance_localization = matches['HDIST'].max()
mean_distance_localization = matches['HDIST'].mean()
median_distance_localization = matches['HDIST'].median()

print(f"Model {selected_model} Localization results")
print(f"   Min distance ({np.round(min_distance_localization,2)} km)")
print(f"   Max distance ({np.round(max_distance_localization,2)} km)")
print(f"   Average distance ({np.round(mean_distance_localization,2)} km)")
print(f"   Median distance ({np.round(median_distance_localization,2)} km)")

Model 11_swin_fg10_t_500_msl_vo_850 Localization results
   Min distance (0.0 km)
   Max distance (999.01 km)
   Average distance (129.33 km)
   Median distance (77.59 km)


In [13]:
# plot_detections(detections, observations)

# Classification

In [14]:
def F_beta(beta, precision, recall):
    return (1 + beta**2) * ((precision * recall) / ((beta**2 * precision) + recall))

In [15]:
n_dets = len(detections)
n_tp = len(matches)
n_obs = len(observations)
n_fp = n_dets - n_tp
n_fn = n_obs - n_tp

In [16]:
precision = n_tp / (n_tp + n_fp)
recall = n_tp / (n_tp + n_fn)
f2_score = F_beta(beta=2, precision=precision, recall=recall) * 100

In [17]:
print(f"Model {selected_model} Classification results")
print(f"   F2 : {np.round(f2_score,2)} % (precision={np.round(precision, 2)}, recall={np.round(recall,2)})")
print(f"   TP : {n_tp} out of {n_obs} observations ({np.round(n_tp / n_obs * 100)} %)")
print(f"   FP : {n_fp} out of {n_dets} ML detections ({np.round(n_fp / n_dets * 100)} %)")
print(f"   FN : {n_fn} out of {n_obs} observations ({np.round(n_fn / n_obs * 100)} %)")

Model 11_swin_fg10_t_500_msl_vo_850 Classification results
   F2 : 60.76 % (precision=0.27, recall=0.88)
   TP : 14698 out of 16708 observations (88.0 %)
   FP : 39431 out of 54129 ML detections (73.0 %)
   FN : 2010 out of 16708 observations (12.0 %)


# Tracking

In [18]:
# minimum track length (1 day)
min_track_count = 12
# maximum distance (in km) between each consecutive tc
max_distance_tracking = 400.0
# minimum speed of wind in order to consider the track true
min_wind_speed = 17.0
# maximum distance between matches between tracks
max_track_distance_tracking = 300.0

grid_res = 0.25
km_to_deg = 110.474

# whether or not to plot the tracks
plot = False

In [19]:
# rename SID to TRACK_ID
observed_tracks = observations.rename(columns={'SID':'TRACK_ID'})
# get only long enough tracks for the comparison
valid_observations_sids = observed_tracks.groupby('TRACK_ID').filter(lambda x: len(x) >= min_track_count)['TRACK_ID'].unique()
# filter out the observations
observed_tracks = observed_tracks[observed_tracks['TRACK_ID'].isin(valid_observations_sids)].reset_index(drop=True)
observed_tracks.head()

Unnamed: 0,ISO_TIME,TRACK_ID,NATURE,WMO_WIND,LAT,LON
0,1983-05-21 00:00:00,1983141N07269,TS,25,7.0,268.5
1,1983-05-21 06:00:00,1983141N07269,TS,25,7.5,267.25
2,1983-05-21 12:00:00,1983141N07269,TS,30,7.75,266.25
3,1983-05-21 18:00:00,1983141N07269,TS,40,8.0,265.25
4,1983-05-22 00:00:00,1983141N07269,TS,45,8.5,264.5


In [20]:
# # apply tracking scheme
# tracking_src = f'/Users/davide/Developer/ml-tropical-cyclones-detection/data/inference/{selected_model}/tracking.csv'
# if not os.path.exists(tracking_src):
#     detected_tracks = init_track_dataframe(detections)
#     # detected_tracks = tracking_algorithm(detected_tracks, max_distance_tracking, min_track_count)
#     detected_tracks = paper_tracking_algorithm(detected_tracks, max_distance_tracking, min_track_count)
#     detected_tracks.to_csv(tracking_src)
# else:
#     detected_tracks = pd.read_csv(tracking_src, index_col=0)
# # store detected tracks to disk
# detected_tracks.head()

detected_tracks = init_track_dataframe(detections)
detected_tracks = tracking_algorithm(detected_tracks, max_distance_tracking, min_track_count)
# detected_tracks = paper_tracking_algorithm(detected_tracks, max_distance_tracking, min_track_count)

In [21]:
# paper_detected_tracks = init_track_dataframe(detections)
# paper_detected_tracks = paper_tracking_algorithm(paper_detected_tracks, max_distance, min_track_count)
# paper_detected_tracks.head()

In [22]:
print(f'There are:')
print(f'   - {len(detected_tracks["TRACK_ID"].unique())} detected tracks')
# print(f'   - {len(paper_detected_tracks["TRACK_ID"].unique())} detected tracks (paper)')
print(f'   - {len(observed_tracks["TRACK_ID"].unique())} observed tracks')

There are:
   - 535 detected tracks
   - 459 observed tracks


In [23]:
if plot:plot_tracks(detected_tracks, observed_tracks)
# plot_tracks(detected_tracks[pd.to_datetime(detected_tracks['ISO_TIME']).dt.year.isin([2005])], observed_tracks[pd.to_datetime(observed_tracks['ISO_TIME']).dt.year.isin([2005])])

# Track Matching

In [24]:
max_track_distance_matching = 300.0

In [25]:
track_matches = track_matching(detected_tracks, observed_tracks, max_track_distance_matching)
# H = HITS = True Positive
H = len(track_matches[(track_matches['DET_TRACK_ID']!='') & (track_matches['OBS_TRACK_ID']!='')])
# M = Miss = False Negative
M = len(track_matches[(track_matches['DET_TRACK_ID']=='') & (track_matches['OBS_TRACK_ID']!='')])
# FA = False Alarm = False Positive
FA = len(track_matches[(track_matches['DET_TRACK_ID']!='') & (track_matches['OBS_TRACK_ID']=='')])

POD = (H / (H + M))
FAR = (FA / (H + FA))

print(f"Hits : {H}")
print(f"Miss : {M}")
print(f"False Alarm : {FA}")
print(f"POD : {POD}")
print(f"FAR : {FAR}")

Unnamed: 0,OBS_TRACK_ID,DET_TRACK_ID
0,1983141N07269,19830521T00_3
1,1983160N11266,19830610T18_2
2,1983186N04153,19830707T06_6
3,1983187N12259,19830708T06_3
4,1983188N06136,19830709T06_4
...,...,...
542,1994186N09139,
543,1983183N11251,
544,2014153N12267,
545,1983264N15248,


# Save to file

In [27]:
columns = [
    'model',
    'max_distance_detection', 
    'n_dets', 
    'n_tp', 
    'n_obs', 
    'n_fp', 
    'n_fn', 
    'precision', 
    'recall', 
    'f2_score', 
    'min_distance_localization', 
    'max_distance_localization', 
    'mean_distance_localization', 
    'median_distance_localization', 
    'min_track_count', 
    'max_distance_tracking', 
    'min_wind_speed', 
    'max_track_distance_matching', 
    'max_track_distance_tracking', 
    'H', 
    'M', 
    'FA', 
    'POD', 
    'FAR', 
    'ibtracs_src', 
    'test_years', 
]
dst = '/Users/davide/Developer/ml-tropical-cyclones-detection/data/inference/results_analysis.csv'
if os.path.exists(dst):
    results = pd.read_csv(dst, index_col=0)
else:
    results = pd.DataFrame(columns=columns)
results

Unnamed: 0,model,max_distance_detection,n_dets,n_tp,n_obs,n_fp,n_fn,precision,recall,f2_score,...,min_wind_speed,max_track_distance_matching,max_track_distance_tracking,H,M,FA,POD,FAR,ibtracs_src,test_years
0,03_vgg_v3_relu_ks3_msl_vo_850,1000.0,349683,73732,82069,275951,8337,0.210854,0.898415,54.37792,...,17.0,300.0,300.0,1900,434,392,0.814053,0.17103,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
1,02_swin_msl_vo_850,1000.0,350174,75884,82069,274290,6185,0.216704,0.924637,55.924534,...,17.0,300.0,300.0,1985,349,458,0.850471,0.187474,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
2,04_vgg_v3_relu_ks5_msl_vo_850,1000.0,364985,73892,82069,291093,8177,0.202452,0.900364,53.29306,...,17.0,300.0,300.0,1900,434,439,0.814053,0.187687,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
3,05_vgg_v3_linear_ks3_msl_vo_850,1000.0,715817,76720,82069,639097,5349,0.107178,0.934823,36.740022,...,17.0,300.0,300.0,1894,440,1628,0.811482,0.462237,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
4,06_swin_msl_vo_850,1000.0,360628,75186,82069,285442,6883,0.208486,0.916132,54.569287,...,17.0,300.0,300.0,1940,394,505,0.831191,0.206544,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
5,07_vgg_v3_silu_ks3_msl_vo_850,1000.0,443769,75801,82069,367968,6268,0.170812,0.923625,49.09105,...,17.0,300.0,300.0,1938,396,461,0.830334,0.192163,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
6,08_vgg_v1_relu_ks3_msl_vo_850,1000.0,376808,75064,82069,301744,7005,0.19921,0.914645,53.230537,...,17.0,300.0,300.0,1977,357,504,0.847044,0.203144,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
7,09_vgg_v1_relu_ks3_msl_vo_850_lnc3,1000.0,396842,75738,82069,321104,6331,0.190852,0.922858,52.224603,...,17.0,300.0,300.0,1997,337,591,0.855613,0.228362,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
8,10_swin_lnc3_msl_vo_850,1000.0,320455,75844,82069,244611,6225,0.236676,0.924149,58.455662,...,17.0,300.0,300.0,1987,347,467,0.851328,0.190302,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
9,11_swin_fg10_t_500_msl_vo_850,1000.0,262675,76506,82044,186169,5538,0.291257,0.9325,64.742211,...,17.0,300.0,300.0,2006,328,392,0.859469,0.16347,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."


In [28]:
results = pd.concat([results, pd.DataFrame(data={
    'model': [selected_model],
    'max_distance_detection': [max_distance_detection], 
    'n_dets': [n_dets], 
    'n_tp': [n_tp], 
    'n_obs': [n_obs], 
    'n_fp': [n_fp], 
    'n_fn': [n_fn], 
    'precision': [precision], 
    'recall': [recall], 
    'f2_score': [f2_score], 
    'min_distance_localization': [min_distance_localization], 
    'max_distance_localization': [max_distance_localization], 
    'mean_distance_localization': [mean_distance_localization], 
    'median_distance_localization': [median_distance_localization], 
    'min_track_count': [min_track_count], 
    'max_distance_tracking': [max_distance_tracking], 
    'min_wind_speed': [min_wind_speed], 
    'max_track_distance_matching': [max_track_distance_matching], 
    'max_track_distance_tracking': [max_track_distance_tracking], 
    'H': [H], 
    'M': [M], 
    'FA': [FA], 
    'POD': [POD], 
    'FAR': [FAR], 
    'ibtracs_src': [ibtracs_src], 
    'test_years': [test_years], 
})])
results

Unnamed: 0,model,max_distance_detection,n_dets,n_tp,n_obs,n_fp,n_fn,precision,recall,f2_score,...,min_wind_speed,max_track_distance_matching,max_track_distance_tracking,H,M,FA,POD,FAR,ibtracs_src,test_years
0,03_vgg_v3_relu_ks3_msl_vo_850,1000.0,349683,73732,82069,275951,8337,0.210854,0.898415,54.37792,...,17.0,300.0,300.0,1900,434,392,0.814053,0.17103,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
1,02_swin_msl_vo_850,1000.0,350174,75884,82069,274290,6185,0.216704,0.924637,55.924534,...,17.0,300.0,300.0,1985,349,458,0.850471,0.187474,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
2,04_vgg_v3_relu_ks5_msl_vo_850,1000.0,364985,73892,82069,291093,8177,0.202452,0.900364,53.29306,...,17.0,300.0,300.0,1900,434,439,0.814053,0.187687,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
3,05_vgg_v3_linear_ks3_msl_vo_850,1000.0,715817,76720,82069,639097,5349,0.107178,0.934823,36.740022,...,17.0,300.0,300.0,1894,440,1628,0.811482,0.462237,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
4,06_swin_msl_vo_850,1000.0,360628,75186,82069,285442,6883,0.208486,0.916132,54.569287,...,17.0,300.0,300.0,1940,394,505,0.831191,0.206544,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
5,07_vgg_v3_silu_ks3_msl_vo_850,1000.0,443769,75801,82069,367968,6268,0.170812,0.923625,49.09105,...,17.0,300.0,300.0,1938,396,461,0.830334,0.192163,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
6,08_vgg_v1_relu_ks3_msl_vo_850,1000.0,376808,75064,82069,301744,7005,0.19921,0.914645,53.230537,...,17.0,300.0,300.0,1977,357,504,0.847044,0.203144,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
7,09_vgg_v1_relu_ks3_msl_vo_850_lnc3,1000.0,396842,75738,82069,321104,6331,0.190852,0.922858,52.224603,...,17.0,300.0,300.0,1997,337,591,0.855613,0.228362,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
8,10_swin_lnc3_msl_vo_850,1000.0,320455,75844,82069,244611,6225,0.236676,0.924149,58.455662,...,17.0,300.0,300.0,1987,347,467,0.851328,0.190302,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."
9,11_swin_fg10_t_500_msl_vo_850,1000.0,262675,76506,82044,186169,5538,0.291257,0.9325,64.742211,...,17.0,300.0,300.0,2006,328,392,0.859469,0.16347,../data/ibtracs/filtered/ibtracs_main-tracks_6...,"[1980, 1981, 1982, 1983, 1984, 1985, 1986, 198..."


In [29]:
results = results.reset_index(drop=True)
# results.to_csv(dst)

In [30]:
sys.path.append('/Users/davide/Developer/ml-tropical-cyclones-detection/resources/library/dynamicopy-0.6.1')
import dynamicopy

columnsd = {'ISO_TIME': 'time','LAT':'lat','LON':'lon','TRACK_ID':'track_id','WS':'ws'}
columns = list(columnsd.values())

bobs = dynamicopy.load_ibtracs()
bobs = bobs[bobs['basin'].isin(['WNP','ENP','NATL'])].reset_index(drop=True)

dets = detected_tracks.rename(columns={'ISO_TIME':'time','LAT':'lat','LON':'lon','TRACK_ID':'track_id'})
obss = observed_tracks.rename(columns={'ISO_TIME':'time','LAT':'lat','LON':'lon','TRACK_ID':'track_id'})

dets['time'] = pd.to_datetime(dets['time'])
obss['time'] = pd.to_datetime(obss['time'])

dets['lon'] = (dets['lon'] + 540) % 360 - 180
obss['lon'] = (obss['lon'] + 540) % 360 - 180
bobs['lon'] = (bobs['lon'] + 540) % 360 - 180
bobs = bobs[(bobs['lon']>=100) & (bobs['lon']<=320) & (bobs['lat']>=0) & (bobs['lat']<=70)]
bobs = bobs[bobs['time'].isin(dates)]

Failure in importing the cartopy library, the dynamicopy.cartoplot will not be loaded.     Please install cartopy if you wish to use it.
Failure in importing the cartopy library, the dynamicopy.cartoplot will not be loaded.     Please install cartopy if you wish to use it.


In [31]:
match_bourdin = dynamicopy.match_tracks(dets, bobs, "ours", 'bourdin', max_dist=max_track_distance_matching, min_overlap=0, ref=True)

n_match = len(match_bourdin[f'id_bourdin'].unique())
n_observations = len(bobs.track_id.unique())
n_detections = len(dets.track_id.unique())

POD = n_match / n_observations
FAR = 1 - (n_match / n_detections)
H, M, FA = n_match, (n_observations-n_match), n_detections - n_match

print(f"Hits : {H}")
print(f"Misses : {M}")
print(f"False Alarms : {FA}")
print(f"POD : {POD}")
print(f"FAR : {FAR}")

Hits : 173
Misses : 56
False Alarms : 362
POD : 0.7554585152838428
FAR : 0.6766355140186916


In [32]:
match_our_ibtracs = dynamicopy.match_tracks(dets, obss, "ours", 'ibtracs', max_dist=max_track_distance_matching, min_overlap=0, ref=True)

n_match = len(match_our_ibtracs[f'id_ibtracs'].unique())
n_observations = len(obss.track_id.unique())
n_detections = len(dets.track_id.unique())

POD = n_match / n_observations
FAR = 1 - (n_match / n_detections)
H, M, FA = n_match, (n_observations-n_match), n_detections - n_match

# POD, FAR, H, M, FA
print(f"Hits : {H}")
print(f"Misses : {M}")
print(f"False Alarms : {FA}")
print(f"POD : {POD}")
print(f"FAR : {FAR}")

Hits : 328
Misses : 131
False Alarms : 207
POD : 0.7145969498910676
FAR : 0.38691588785046727


In [33]:
match_obs_ibtracs = dynamicopy.match_tracks(bobs, obss, "bourdin", 'ibtracs', max_dist=max_track_distance_matching, min_overlap=0, ref=True)

n_match = min(len(match_obs_ibtracs[f'id_bourdin'].unique()), len(match_obs_ibtracs[f'id_ibtracs'].unique()))
n_observations = len(obss.track_id.unique())
n_detections = len(bobs.track_id.unique())

POD = n_match / n_observations
FAR = 1 - (n_match / n_detections)
H, M, FA = n_match, (n_observations-n_match), n_detections - n_match

# POD, FAR, H, M, FA
print(f"Hits : {H}")
print(f"Misses : {M}")
print(f"False Alarms : {FA}")
print(f"POD : {POD}")
print(f"FAR : {FAR}")

match_obs_ibtracs = dynamicopy.match_tracks(obss, bobs, "ibtracs", 'bourdin', max_dist=max_track_distance_matching, min_overlap=0, ref=True)

n_match = min(len(match_obs_ibtracs[f'id_bourdin'].unique()), len(match_obs_ibtracs[f'id_ibtracs'].unique()))
n_observations = len(bobs.track_id.unique())
n_detections = len(obss.track_id.unique())

POD = n_match / n_observations
FAR = 1 - (n_match / n_detections)
H, M, FA = n_match, (n_observations-n_match), n_detections - n_match

# POD, FAR, H, M, FA
print(f"\nHits : {H}")
print(f"Misses : {M}")
print(f"False Alarms : {FA}")
print(f"POD : {POD}")
print(f"FAR : {FAR}")

Hits : 223
Misses : 236
False Alarms : 6
POD : 0.485838779956427
FAR : 0.026200873362445365

Hits : 223
Misses : 6
False Alarms : 236
POD : 0.9737991266375546
FAR : 0.514161220043573


# Paper Results

In the paper, with the ML ensemble we have the following results:

- F2-score : 53 %
- Euclidean distance : 117.06 km
- Hit rate : 88.91 %
- POD : 71.49 %
- FAR : 23.00 %