In [1]:
# NHC OFCL (intensity) error analysis using RI
# Note on the sign of intensity errors (calculated as: forecast intensity - best track intensity):
#  Intensity error that is negative corresponds to a forecast underestimating the intensity (compared to best track)
#  Intensity error that is positive corresponds to a forecast overestimating the intensity (compared to best track)

# SKIP TO CHECKPOINTS (for each BASIN) and only run from there (to use the precomputed .parquet file)
#   (otherwise it will take minutes)

# The following is based on RI analysis from the following paper
# https://journals.ametsoc.org/view/journals/wefo/35/6/WAF-D-19-0253.1.xml#bib15
# This notebook has only done Atlantic RI)

# Note: as in the paper above, there is some overlap for rapid intensification events and forecasts
# an example would be: RI in 24h in one forecast, and then a subsequent forecast 12 hours later has the 12h with RI
# this results in some overlap (double counting)

import numpy as np
import pandas as pd
from datetime import datetime, timedelta

filepath_ti_errs_atl = '1989-present_OFCL_v_BCD5_ind_ATL_TI_errors.txt'
filepath_ti_errs_pac = '1989-present_OFCL_v_BCD5_ind_EPAC_TI_errors.txt'
filepath_ac_errs_atl = '1989-present_OFCL_v_BCD5_ind_ATL_AC_errors.txt'
filepath_ac_errs_pac = '1989-present_OFCL_v_BCD5_ind_EPAC_AC_errors.txt'

# only considering track intensity errors for the atlantic (filepath_ti_errs_atl) in this notebook

In [2]:
def read_intensity_err(filepath):
    header = {}
    with open(filepath, 'r') as f:
        # header lines
        ln = f.readline().strip()
        header['basin'] = ln.split(':')[1].strip()
        ln = f.readline().strip()
        header['models'] = ln.split(':')[1].strip().split(' ')
        ln = f.readline().strip()
        header['kt_range'] = ln.split(':')[1].strip()
        ln = f.readline().strip()
        header['subtropical'] = ln
        ln = f.readline().strip()
        header['extratropical'] = ln
        ln = f.readline().strip()
        header['dissipation'] = ln
        
        # only keep columns for official forecasts (drop baseline forecasts)
        if header['models'][0] == 'OFCL':
            drop_match_str1 = 'hI02'
            drop_match_str2 = 'hT02'
        else:
            drop_match_str1 = 'hI01'
            drop_match_str2 = 'hT01'

        # line space between header and data
        ln = f.readline().strip()
        
        # line with column names
        ln = f.readline().strip()
        column_names = list(filter(None,ln.split(' ')))
        
        drop_columns = []
        for column_name in column_names:
            if drop_match_str1 in column_name or drop_match_str2 in column_name:
                drop_columns.append(column_name)
        
        # loop over the data
        data = []
        while True:
            ln = f.readline()
            if not ln:
                break
            ln = ln.strip()
            row = list(filter(None,ln.split(' ')))
            data.append(row)

        # Create the pandas DataFrame
        df = pd.DataFrame(data, columns = column_names)
        # convert the time column to panda timestamp
        df['Date/Time'] = pd.to_datetime(df['Date/Time'], dayfirst=True)
        # rename it to 'ds' to be shorter
        df.rename(columns={'Date/Time': 'ds'}, inplace=True)
        # drop columns from base line forecast (CLIPER5 or SHIFOR5) and keep OFCL
        df.drop(columns=drop_columns, inplace=True)
        for column_name in list(df.keys()):
            if column_name != 'ds' and column_name != 'STMID': 
                df[column_name] = df[column_name].astype(float)
        # replace missing values (-9999) with NaN
        df.replace(-9999.0, np.NaN, inplace=True)
        # convert all but 'ds' and 'STMID' from string to float now
        df.reset_index(drop=True, inplace=True)
        return [header, df]

def is_rapid_intensification(valid_h, base_intensity, valid_intensity):
    ri = False
    if np.isnan(base_intensity) or np.isnan(valid_intensity):
        return ri
    if valid_h == 0:
        # don't do RI for base time (000h)
        return ri
    # https://journals.ametsoc.org/view/journals/wefo/35/6/WAF-D-19-0253.1.xml#bib15
    # 'RI is therefore defined as an increase of at least 20 kt in 12 h, 30 kt in 24 h, 45 kt in 36 h, and 55 kt in 48 h'
    intensity_change = valid_intensity - base_intensity
    rapid_intensification_threshold = np.NaN
    if valid_h <= 12:
        rapid_intensification_threshold = 20
    elif valid_h <= 24:
        rapid_intensification_threshold = 30
    elif valid_h <= 36:
        rapid_intensification_threshold = 45
    elif valid_h <= 48:
        rapid_intensification_threshold = 55
    # only consider rapid intensification for the above periods
    if np.isnan(rapid_intensification_threshold):
        return ri
    if (intensity_change >= rapid_intensification_threshold):
        ri = True
    return ri

In [3]:






#####################
# ATLANTIC
#####################








In [4]:
[header, df] = read_intensity_err(filepath_ti_errs_atl)
# track and intensity (used for naming files)
error_category = 'TI'

In [5]:
# create mappings and create new columns for calculations and analysis

column_names = list(df.keys())
# examples of column names with 48 hours in parenthesis
# (48hI01) intensity valid forecast hours
intensity_err_column_name_to_hour = {}
intensity_err_hour_to_column_name = {}
# (48hFI) forecast intensity for valid hours of the forecast (missing so recalculate from data)
forecast_intensity_column_name_to_hour = {}
forecast_intensity_hour_to_column_name = {}
# (48hBI) best intensity for valid hours of the forecast
best_intensity_column_name_to_hour = {}
best_intensity_hour_to_column_name = {}
# (48hFRI) forecast rapid intensification
forecast_rapid_intensification_column_name_to_hour = {}
forecast_rapid_intensification_hour_to_column_name = {}
# (48hBRI) best intensity rapid intensification
best_intensity_rapid_intensification_column_name_to_hour = {}
best_intensity_rapid_intensification_hour_to_column_name = {}
# (48hDFI) changes in forecast intensity from base time to valid hour
forecast_intensity_change_column_name_to_hour = {}
forecast_intensity_change_hour_to_column_name = {}
# (48hDBI) changes in best intensity from base time to valid hour
best_intensity_change_column_name_to_hour = {}
best_intensity_change_hour_to_column_name = {}
# (48hFIE) forecast intensity error (forecast intensity - best track intensity)
forecast_intensity_error_column_name_to_hour = {}
forecast_intensity_error_hour_to_column_name = {}

for column_name in column_names:
    if 'hI' in column_name:
        # create mappings from intensity column names to hour
        h = int(column_name.split('hI')[0])
        intensity_err_column_name_to_hour[column_name] = h
        intensity_err_hour_to_column_name[h] = column_name
        
        # create extra columns for forecast intensity for each forecast hour
        forecast_intensity_column_name = f'{h}hFI'
        df[forecast_intensity_column_name] = np.NaN
        forecast_intensity_column_name_to_hour[forecast_intensity_column_name] = h
        forecast_intensity_hour_to_column_name[h] = forecast_intensity_column_name
        
        # create extra columns for best intensity for each forecast hour
        best_intensity_column_name = f'{h}hBI'
        df[best_intensity_column_name] = np.NaN
        best_intensity_column_name_to_hour[best_intensity_column_name] = h
        best_intensity_hour_to_column_name[h] = best_intensity_column_name

        # create extra columns for forecast RI categorization for each forecast hour
        forecast_rapid_intensification_column_name = f'{h}hFRI'
        df[forecast_rapid_intensification_column_name] = False
        forecast_rapid_intensification_column_name_to_hour[forecast_rapid_intensification_column_name] = h
        forecast_rapid_intensification_hour_to_column_name[h] = forecast_rapid_intensification_column_name
        
        # create extra columns for best intensity RI categorization for each forecast hour
        best_intensity_rapid_intensification_column_name = f'{h}hBRI'
        df[best_intensity_rapid_intensification_column_name] = False
        best_intensity_rapid_intensification_column_name_to_hour[best_intensity_rapid_intensification_column_name] = h
        best_intensity_rapid_intensification_hour_to_column_name[h] = best_intensity_rapid_intensification_column_name
        
        # create extra columns for changes in forecast intensity for each forecast hour
        # (forecast intensity at valid hour - forecast intensity at base hour)
        forecast_intensity_change_column_name = f'{h}hDFI'
        df[forecast_intensity_change_column_name] = np.NaN
        forecast_intensity_change_column_name_to_hour[forecast_intensity_change_column_name] = h
        forecast_intensity_change_hour_to_column_name[h] = forecast_intensity_change_column_name
        
        # create extra columns for changes in (best) intensity for each forecast hour
        # (best intensity at valid hour (from other row) - best intensity at base hour)
        best_intensity_change_column_name = f'{h}hDBI'
        df[best_intensity_change_column_name] = np.NaN
        best_intensity_change_column_name_to_hour[best_intensity_change_column_name] = h
        best_intensity_change_hour_to_column_name[h] = best_intensity_change_column_name

        # create extra columns for forecast intensity error
        # (forecast intensity at valid hour - best intensity at valid hour)
        forecast_intensity_error_column_name = f'{h}hFIE'
        df[forecast_intensity_error_column_name] = np.NaN
        forecast_intensity_error_column_name_to_hour[forecast_intensity_error_column_name] = h
        forecast_intensity_error_hour_to_column_name[h] = forecast_intensity_error_column_name

  df[forecast_intensity_column_name] = np.NaN
  df[best_intensity_column_name] = np.NaN
  df[forecast_rapid_intensification_column_name] = False
  df[best_intensity_rapid_intensification_column_name] = False
  df[forecast_intensity_change_column_name] = np.NaN
  df[best_intensity_change_column_name] = np.NaN
  df[forecast_intensity_error_column_name] = np.NaN
  df[forecast_intensity_column_name] = np.NaN
  df[best_intensity_column_name] = np.NaN
  df[forecast_rapid_intensification_column_name] = False
  df[best_intensity_rapid_intensification_column_name] = False
  df[forecast_intensity_change_column_name] = np.NaN
  df[best_intensity_change_column_name] = np.NaN
  df[forecast_intensity_error_column_name] = np.NaN


In [6]:
# column for if any of the forecast hours show rapid intensification
# (forecast) rapid intensification
df['FRI'] = False
df['FRI_count'] = 0

# column for if any of the (best intensity) valid hours of a forecast show rapid intensification
# (best intensity) rapid intensification
df['BRI'] = False
df['BRI_count'] = 0

# defragment frame (for performance)
df = df.copy()

  df['FRI'] = False
  df['FRI_count'] = 0
  df['BRI'] = False
  df['BRI_count'] = 0


In [7]:
# calculate forecast intensities and rapid intensification

# make sure to sort so that we always calculate the forecast_base_intensity first in the inner loop below
sorted_intensity_err_hour_to_column_name_items = sorted(intensity_err_hour_to_column_name.items(), key=lambda x:x[0])

for idx in range(0, len(df)):
    base_time = df.iloc[idx]['ds']
    stmid = df.iloc[idx]['STMID']
    forecast_base_intensity = np.NaN
    forecast_rapid_intensification_any = False
    best_intensity_rapid_intensification_any = False
    for [valid_h, valid_column_name] in sorted_intensity_err_hour_to_column_name_items:
        intensity_err = df.loc[idx][valid_column_name]
        best_intensity = df.loc[idx]['WS']
        if not np.isnan(intensity_err):
            valid_time = base_time + timedelta(hours=valid_h)
            valid_best_intensity_row = df.loc[(df['ds'] == valid_time) & (df['STMID'] == stmid)]
            if not valid_best_intensity_row.empty:
                valid_best_intensity = valid_best_intensity_row['WS'].iloc[0]
                if not np.isnan(valid_best_intensity):
                    # calculate forecast intensity from error and best_intensity
                    forecast_intensity = valid_best_intensity + intensity_err

                    # set the base intensity for this forecast
                    if valid_h == 0:
                        forecast_base_intensity = forecast_intensity
                    
                    # set forecast intensity for valid hour
                    forecast_intensity_column_name = forecast_intensity_hour_to_column_name[valid_h]
                    df.at[idx, forecast_intensity_column_name] = forecast_intensity
                    
                    # set best intensity for valid hour
                    best_intensity_column_name = best_intensity_hour_to_column_name[valid_h]
                    df.at[idx, best_intensity_column_name] = valid_best_intensity

                    # calculate forecast rapid intensification for (selected) forecast hours
                    forecast_rapid_intensification_column_name = forecast_rapid_intensification_hour_to_column_name[valid_h]
                    forecast_rapid_intensification = is_rapid_intensification(valid_h, forecast_base_intensity, forecast_intensity)
                    df.at[idx, forecast_rapid_intensification_column_name] = forecast_rapid_intensification
                    if forecast_rapid_intensification:
                        df.at[idx, 'FRI_count'] = df.at[idx, 'FRI_count'] + 1
                        forecast_rapid_intensification_any = True
                    
                    # calculate best intensity rapid intensification for (selected) forecast hours
                    best_intensity_rapid_intensification_column_name = best_intensity_rapid_intensification_hour_to_column_name[valid_h]
                    best_intensity_rapid_intensification = is_rapid_intensification(valid_h, best_intensity, valid_best_intensity)
                    df.at[idx, best_intensity_rapid_intensification_column_name] = best_intensity_rapid_intensification
                    if best_intensity_rapid_intensification:
                        df.at[idx, 'BRI_count'] = df.at[idx, 'BRI_count'] + 1
                        best_intensity_rapid_intensification_any = True
                    
                    # calculate the change in forecast intensity at valid hour
                    forecast_intensity_change = forecast_intensity - forecast_base_intensity
                    forecast_intensity_change_column_name = forecast_intensity_change_hour_to_column_name[valid_h]
                    df.at[idx, forecast_intensity_change_column_name] = forecast_intensity_change
                    
                    # calculate the change in best intensity at valid hour
                    best_intensity_change = valid_best_intensity - best_intensity
                    best_intensity_change_column_name = best_intensity_change_hour_to_column_name[valid_h]
                    df.at[idx, best_intensity_change_column_name] = best_intensity_change
                    
                    # calculate the intensity error
                    forecast_intensity_error = forecast_intensity - valid_best_intensity
                    forecast_intensity_error_column_name = forecast_intensity_error_hour_to_column_name[valid_h]
                    df.at[idx, forecast_intensity_error_column_name] = forecast_intensity_error
                else:
                    print(f"Warning: best_intensity data not available for {valid_h} from forecast index {idx}...")
            else:
                # raise warning
                # miss best_intensity calculations if there is no best track 'ws' data here?
                print(f"Warning: no forecast row for {valid_h} starting from forecast index {idx}...")
    df.at[idx, 'FRI'] = forecast_rapid_intensification_any
    df.at[idx, 'BRI'] = best_intensity_rapid_intensification_any


In [8]:
list(df.keys())

['ds',
 'STMID',
 'F012',
 'F024',
 'F036',
 'F048',
 'F060',
 'F072',
 'F096',
 'F120',
 'F144',
 'F168',
 'Lat',
 'Lon',
 'WS',
 '000hT01',
 '012hT01',
 '024hT01',
 '036hT01',
 '048hT01',
 '060hT01',
 '072hT01',
 '096hT01',
 '120hT01',
 '144hT01',
 '168hT01',
 '000hI01',
 '012hI01',
 '024hI01',
 '036hI01',
 '048hI01',
 '060hI01',
 '072hI01',
 '096hI01',
 '120hI01',
 '144hI01',
 '168hI01',
 '0hFI',
 '0hBI',
 '0hFRI',
 '0hBRI',
 '0hDFI',
 '0hDBI',
 '0hFIE',
 '12hFI',
 '12hBI',
 '12hFRI',
 '12hBRI',
 '12hDFI',
 '12hDBI',
 '12hFIE',
 '24hFI',
 '24hBI',
 '24hFRI',
 '24hBRI',
 '24hDFI',
 '24hDBI',
 '24hFIE',
 '36hFI',
 '36hBI',
 '36hFRI',
 '36hBRI',
 '36hDFI',
 '36hDBI',
 '36hFIE',
 '48hFI',
 '48hBI',
 '48hFRI',
 '48hBRI',
 '48hDFI',
 '48hDBI',
 '48hFIE',
 '60hFI',
 '60hBI',
 '60hFRI',
 '60hBRI',
 '60hDFI',
 '60hDBI',
 '60hFIE',
 '72hFI',
 '72hBI',
 '72hFRI',
 '72hBRI',
 '72hDFI',
 '72hDBI',
 '72hFIE',
 '96hFI',
 '96hBI',
 '96hFRI',
 '96hBRI',
 '96hDFI',
 '96hDBI',
 '96hFIE',
 '120hFI',
 '

In [9]:
# save computation
#df.to_parquet()
file_name_parquet = 'NHC_OFCL_' + header['basin'].replace(' ', '_') + '_' + error_category + '.parquet'
df.to_parquet(file_name_parquet)
print(file_name_parquet)

NHC_OFCL_ATLANTIC_1989-2022_TI.parquet


In [10]:
###### CHECKPOINT

In [11]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

file_name_parquet = 'NHC_OFCL_ATLANTIC_1989-2022_TI.parquet'

In [12]:
df_ri = pd.read_parquet(file_name_parquet)

In [13]:
# repeated as above (without df modifications) so it works with checkpointing
# create mappings and create new columns for calculations and analysis

column_names = list(df.keys())
# examples of column names with 48 hours in parenthesis
# (48hI01) intensity valid forecast hours
intensity_err_column_name_to_hour = {}
intensity_err_hour_to_column_name = {}
# (48hFI) forecast intensity for valid hours of the forecast (missing so recalculate from data)
forecast_intensity_column_name_to_hour = {}
forecast_intensity_hour_to_column_name = {}
# (48hBI) best intensity for valid hours of the forecast
best_intensity_column_name_to_hour = {}
best_intensity_hour_to_column_name = {}
# (48hFRI) forecast rapid intensification
forecast_rapid_intensification_column_name_to_hour = {}
forecast_rapid_intensification_hour_to_column_name = {}
# (48hBRI) best intensity rapid intensification
best_intensity_rapid_intensification_column_name_to_hour = {}
best_intensity_rapid_intensification_hour_to_column_name = {}
# (48hDFI) changes in forecast intensity from base time to valid hour
forecast_intensity_change_column_name_to_hour = {}
forecast_intensity_change_hour_to_column_name = {}
# (48hDBI) changes in best intensity from base time to valid hour
best_intensity_change_column_name_to_hour = {}
best_intensity_change_hour_to_column_name = {}
# (48hFIE) forecast intensity error (forecast intensity - best track intensity)
forecast_intensity_error_column_name_to_hour = {}
forecast_intensity_error_hour_to_column_name = {}

for column_name in column_names:
    if 'hI' in column_name:
        # create mappings from intensity column names to hour
        h = int(column_name.split('hI')[0])
        intensity_err_column_name_to_hour[column_name] = h
        intensity_err_hour_to_column_name[h] = column_name
        
        # create extra columns for forecast intensity for each forecast hour
        forecast_intensity_column_name = f'{h}hFI'
        forecast_intensity_column_name_to_hour[forecast_intensity_column_name] = h
        forecast_intensity_hour_to_column_name[h] = forecast_intensity_column_name
        
        # create extra columns for best intensity for each forecast hour
        best_intensity_column_name = f'{h}hBI'
        best_intensity_column_name_to_hour[best_intensity_column_name] = h
        best_intensity_hour_to_column_name[h] = best_intensity_column_name

        # create extra columns for forecast RI categorization for each forecast hour
        forecast_rapid_intensification_column_name = f'{h}hFRI'
        forecast_rapid_intensification_column_name_to_hour[forecast_rapid_intensification_column_name] = h
        forecast_rapid_intensification_hour_to_column_name[h] = forecast_rapid_intensification_column_name
        
        # create extra columns for best intensity RI categorization for each forecast hour
        best_intensity_rapid_intensification_column_name = f'{h}hBRI'
        best_intensity_rapid_intensification_column_name_to_hour[best_intensity_rapid_intensification_column_name] = h
        best_intensity_rapid_intensification_hour_to_column_name[h] = best_intensity_rapid_intensification_column_name
        
        # create extra columns for changes in forecast intensity for each forecast hour
        # (forecast intensity at valid hour - forecast intensity at base hour)
        forecast_intensity_change_column_name = f'{h}hDFI'
        forecast_intensity_change_column_name_to_hour[forecast_intensity_change_column_name] = h
        forecast_intensity_change_hour_to_column_name[h] = forecast_intensity_change_column_name
        
        # create extra columns for changes in (best) intensity for each forecast hour
        # (best intensity at valid hour (from other row) - best intensity at base hour)
        best_intensity_change_column_name = f'{h}hDBI'
        best_intensity_change_column_name_to_hour[best_intensity_change_column_name] = h
        best_intensity_change_hour_to_column_name[h] = best_intensity_change_column_name
        
        # create extra columns for forecast intensity error
        # (forecast intensity at valid hour - best intensity at valid hour)
        forecast_intensity_error_column_name = f'{h}hFIE'
        df[forecast_intensity_error_column_name] = np.NaN
        forecast_intensity_error_column_name_to_hour[forecast_intensity_error_column_name] = h
        forecast_intensity_error_hour_to_column_name[h] = forecast_intensity_error_column_name

In [14]:



######## STATS


pd.set_option('display.max_rows', None)


In [15]:
### statistics for forecasted rapid intensification (FRI)
### (forecast RI)

print("(ATL) Statistics for storms with forecast intensity rapid intensification")
print("===================================================================")

fri_num_events = df_ri.loc[df_ri['FRI'] == True]['FRI_count'].sum()
fri_forecast_count = len(df_ri.loc[df_ri['FRI'] == True])
print(f"Number of forecast rapid intensification events on a valid hour: {fri_num_events}")
print(f"Number of forecasts with a forecast rapid intensification event on a valid hour: {fri_forecast_count}")
fri_storm_ids = set()
for idx in df_ri.loc[df_ri['FRI'] == True].index:
    fri_storm_ids.add(df_ri.at[idx,'STMID'])
print(f"{len(fri_storm_ids)} Storms with rapid intensification in forecast:")
print(fri_storm_ids)
print("")

fri_errors = {}
print("Number of forecast rapid intensification events for each forecast valid hour:")
for column_name in forecast_rapid_intensification_column_name_to_hour.keys():
    fri_rows = df_ri.loc[df_ri[column_name] == True]
    count = len(fri_rows)
    if count > 0:
        print(f"{column_name} : {count}")
    
    valid_h = forecast_rapid_intensification_column_name_to_hour[column_name]
    intensity_err_column_name = intensity_err_hour_to_column_name[valid_h]
    fri_errors[valid_h] = []
    for idx in fri_rows.index:
        prev_fri = fri_errors[valid_h]
        intensity_err = fri_rows.at[idx,intensity_err_column_name]
        prev_fri.append(intensity_err)
        fri_errors[valid_h] = prev_fri

# remove empty keys
fri_errors = dict((k, v) for k, v in fri_errors.items() if v)

print("")
print("Table 1.")
# convert to dataframe and compute some stats
print("(ATL) Forecast intensity error for forecast rapid intensification (columns are forecast valid hours):")
df_fri_errors = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in fri_errors.items() ]))
# save table
df_fri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]).to_csv('table1.csv')
df_fri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99])

(ATL) Statistics for storms with forecast intensity rapid intensification
Number of forecast rapid intensification events on a valid hour: 186
Number of forecasts with a forecast rapid intensification event on a valid hour: 118
65 Storms with rapid intensification in forecast:
{'AL072010', 'AL191995', 'AL051998', 'AL252005', 'AL092017', 'AL072000', 'AL091996', 'AL081996', 'AL112017', 'AL292020', 'AL041999', 'AL132022', 'AL051996', 'AL131999', 'AL172008', 'AL061996', 'AL131998', 'AL092022', 'AL312020', 'AL061991', 'AL041992', 'AL031996', 'AL192020', 'AL131996', 'AL062018', 'AL132010', 'AL071993', 'AL031998', 'AL152022', 'AL152001', 'AL202020', 'AL021998', 'AL112001', 'AL062006', 'AL132020', 'AL111996', 'AL042005', 'AL042009', 'AL112000', 'AL091993', 'AL051993', 'AL132000', 'AL112010', 'AL102000', 'AL182021', 'AL011994', 'AL142018', 'AL052000', 'AL262020', 'AL142017', 'AL072008', 'AL052001', 'AL122021', 'AL092021', 'AL032001', 'AL052019', 'AL041996', 'AL101996', 'AL101995', 'AL132002', '

Unnamed: 0,12,24,36,48
count,86.0,67.0,21.0,12.0
mean,-0.883721,-1.641791,-6.904762,-1.25
std,12.50357,18.553527,14.359334,14.000812
min,-60.0,-40.0,-35.0,-30.0
1%,-34.5,-40.0,-34.0,-29.45
2%,-26.5,-38.4,-33.0,-28.9
3%,-25.0,-35.1,-32.0,-28.35
4%,-23.0,-35.0,-31.0,-27.8
5%,-20.0,-33.5,-30.0,-27.25
6%,-20.0,-30.2,-30.0,-26.7


In [16]:
### statistics for best intensity rapid intensification (BRI)
## ("observed" RI)

print("(ATL) Statistics for storms with best intensity rapid intensification:")
print("================================================================")

bri_num_events = df_ri.loc[df_ri['BRI'] == True]['BRI_count'].sum()
bri_forecast_count = len(df_ri.loc[df_ri['BRI'] == True])
print(f"Number of best intensity rapid intensification events on valid hours: {bri_num_events}")
print(f"Number of forecasts with a best intensity rapid intensification event on a valid hour: {bri_forecast_count}")
bri_storm_ids = set()
for idx in df_ri.loc[df_ri['BRI'] == True].index:
    bri_storm_ids.add(df_ri.at[idx,'STMID'])
print(f"{len(bri_storm_ids)} Storms with best intensity rapid intensification:")
print("")
print(bri_storm_ids)
print("")

bri_errors = {}
print("Number of best intensity rapid intensification events for each forecast valid hour:")
for column_name in best_intensity_rapid_intensification_column_name_to_hour.keys():
    bri_rows = df_ri.loc[df_ri[column_name] == True]
    count = len(bri_rows)
    if count > 0:
        print(f"{column_name} : {count}")
    
    valid_h = best_intensity_rapid_intensification_column_name_to_hour[column_name]
    intensity_err_column_name = intensity_err_hour_to_column_name[valid_h]
    bri_errors[valid_h] = []
    for idx in bri_rows.index:
        prev_bri = bri_errors[valid_h]
        intensity_err = bri_rows.at[idx,intensity_err_column_name]
        prev_bri.append(intensity_err)
        bri_errors[valid_h] = prev_bri

# remove empty keys
bri_errors = dict((k, v) for k, v in bri_errors.items() if v)

print("")
print("Table 2.")
# convert to dataframe and compute some stats
print("(ATL) Forecast intensity error for best intensity rapid intensification (columns are forecast valid hours):")
df_bri_errors = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in bri_errors.items() ]))
# save table
df_bri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]).to_csv('table2.csv')
df_bri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99])

(ATL) Statistics for storms with best intensity rapid intensification:
Number of best intensity rapid intensification events on valid hours: 1679
Number of forecasts with a best intensity rapid intensification event on a valid hour: 945
181 Storms with best intensity rapid intensification:

{'AL012004', 'AL191995', 'AL051998', 'AL042008', 'AL072016', 'AL172010', 'AL162016', 'AL112017', 'AL081996', 'AL041999', 'AL051996', 'AL152008', 'AL131999', 'AL172008', 'AL132004', 'AL092022', 'AL182012', 'AL192020', 'AL042007', 'AL071993', 'AL082012', 'AL101990', 'AL112015', 'AL051992', 'AL032004', 'AL092020', 'AL042005', 'AL031991', 'AL082008', 'AL042009', 'AL162007', 'AL132000', 'AL122017', 'AL262020', 'AL142018', 'AL092021', 'AL052019', 'AL041996', 'AL122010', 'AL132012', 'AL082020', 'AL032012', 'AL182005', 'AL051997', 'AL282020', 'AL072010', 'AL112009', 'AL102002', 'AL122007', 'AL052002', 'AL092017', 'AL122005', 'AL061992', 'AL092013', 'AL162022', 'AL202005', 'AL182010', 'AL131998', 'AL142016',

Unnamed: 0,12,24,36,48
count,459.0,565.0,340.0,315.0
mean,-17.159041,-23.283186,-32.411765,-36.603175
std,9.422331,11.262085,14.097464,15.243814
min,-60.0,-70.0,-80.0,-95.0
1%,-47.1,-55.0,-75.0,-79.3
2%,-40.0,-50.0,-66.1,-70.0
3%,-36.3,-45.0,-60.0,-65.0
4%,-35.0,-45.0,-60.0,-65.0
5%,-35.0,-40.0,-55.0,-60.0
6%,-30.0,-40.0,-55.0,-60.0


In [17]:
### Basic statistics for number of storms and RI storms
print("Basic statistics for all Atlantic storms")
print("========================================")

all_storm_ids = set(df_ri['STMID'].to_list())
num_storms = len(all_storm_ids)
pct_bri = 100.0 * len(bri_storm_ids) / num_storms
pct_no_bri = 100.0 - pct_bri
pct_fri = 100.0 * len(fri_storm_ids) / num_storms
pct_no_fri = 100.0 - pct_fri
print(f"Total storms: {num_storms}")
print("")
print(f"% of all storms with best intensity rapid intensification: {pct_bri:2.2f} %")
print(f"% of all storms with no best intensity rapid intensification: {pct_no_bri:2.2f} %")
print("")
print(f"% of all storms with forecast rapid intensification: {pct_fri:2.2f} %")
print(f"% of all storms with no forecast rapid intensification: {pct_no_fri:2.2f} %")

Basic statistics for all Atlantic storms
Total storms: 545

% of all storms with best intensity rapid intensification: 33.21 %
% of all storms with no best intensity rapid intensification: 66.79 %

% of all storms with forecast rapid intensification: 11.93 %
% of all storms with no forecast rapid intensification: 88.07 %


In [18]:
### Statistics for storms with no best intensity rapid intensification
## ("observed" NO RI)

no_bri_storm_ids = all_storm_ids.difference(bri_storm_ids)
# the number of valid forecast hour columns (including base time)
num_valid_hour_columns = len(best_intensity_rapid_intensification_hour_to_column_name)

df_no_bri = df_ri.loc[df_ri['BRI'] == False]
# subtract one since the base time can never have rapid intensification
no_bri_num_events = (len(df_no_bri) * (num_valid_hour_columns - 1))

no_bri_forecast_count = len(df_no_bri)

print("(ATL) Statistics for storms with no best intensity rapid intensification:")
print("===================================================================")
print(f"Number of best intensity no rapid intensification events on valid hours: {no_bri_num_events}")
print(f"Number of forecasts with no best intensity rapid intensification event on a valid hour: {no_bri_forecast_count}")
print(f"{len(no_bri_storm_ids)} Storms with no best intensity rapid intensification:")
print("")
print(no_bri_storm_ids)
print("")

no_bri_errors = {}
for [valid_h, column_name] in intensity_err_hour_to_column_name.items():
    no_bri_errors[valid_h] = []
    for idx in df_no_bri.index:
        prev_no_bri = no_bri_errors[valid_h]
        intensity_err = df_no_bri.at[idx, column_name]
        prev_no_bri.append(intensity_err)
        no_bri_errors[valid_h] = prev_no_bri

# remove empty keys
no_bri_errors = dict((k, v) for k, v in no_bri_errors.items() if v)

print("")
print("Table 3.")
# convert to dataframe and compute some stats
print("(ATL) Forecast intensity error for best intensity no rapid intensification (columns are forecast valid hours):")
df_no_bri_errors = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in no_bri_errors.items() ]))
# save table
df_no_bri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]).to_csv('table3.csv')
df_no_bri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99])

(ATL) Statistics for storms with no best intensity rapid intensification:
Number of best intensity no rapid intensification events on valid hours: 113670
Number of forecasts with no best intensity rapid intensification event on a valid hour: 11367
364 Storms with no best intensity rapid intensification:

{'AL162000', 'AL182020', 'AL102010', 'AL032010', 'AL102011', 'AL041989', 'AL101993', 'AL172019', 'AL022017', 'AL012007', 'AL051989', 'AL041991', 'AL111995', 'AL051993', 'AL122019', 'AL071989', 'AL062022', 'AL012010', 'AL061999', 'AL172003', 'AL082003', 'AL082010', 'AL042021', 'AL012008', 'AL042000', 'AL282005', 'AL152013', 'AL142012', 'AL112016', 'AL052009', 'AL022013', 'AL091992', 'AL122015', 'AL032021', 'AL172021', 'AL121990', 'AL071994', 'AL111998', 'AL172005', 'AL072005', 'AL302005', 'AL052010', 'AL211995', 'AL062003', 'AL012013', 'AL112020', 'AL142001', 'AL141999', 'AL161995', 'AL122002', 'AL121998', 'AL202021', 'AL122013', 'AL061995', 'AL022019', 'AL082021', 'AL092019', 'AL021994

Unnamed: 0,0,12,24,36,48,60,72,96,120,144,168
count,9923.0,8940.0,7935.0,7000.0,6155.0,711.0,4792.0,2439.0,1880.0,0.0,0.0
mean,-0.746246,0.928971,1.975425,2.147857,2.086109,1.188467,1.38773,-0.194752,-1.348404,,
std,4.0407,7.709195,10.983638,13.524659,15.97098,13.401742,20.704668,21.711836,23.91228,,
min,-35.0,-30.0,-45.0,-50.0,-65.0,-50.0,-90.0,-90.0,-100.0,,
1%,-15.0,-20.0,-25.0,-30.0,-35.0,-35.0,-55.0,-60.0,-75.0,,
2%,-10.0,-15.0,-20.0,-25.0,-30.0,-30.0,-45.0,-50.0,-60.0,,
3%,-10.0,-15.0,-20.0,-25.0,-30.0,-25.0,-40.0,-45.0,-50.0,,
4%,-10.0,-10.0,-15.0,-20.0,-25.0,-25.0,-35.0,-45.0,-45.0,,
5%,-5.0,-10.0,-15.0,-20.0,-25.0,-25.0,-35.0,-40.0,-40.0,,
6%,-5.0,-10.0,-15.0,-20.0,-25.0,-20.0,-30.0,-35.0,-40.0,,


In [19]:
### Statistics for storms with no best intensity rapid intensification but with forecast rapid intensification
## ("observed" NO RI, YES forecast RI)

no_bri_yes_fri_storm_ids = no_bri_storm_ids.intersection(fri_storm_ids)

df_no_bri_yes_fri = df_ri.loc[(df_ri['BRI'] == False) & (df_ri['FRI'] == True)]
no_bri_yes_fri_num_events = df_no_bri_yes_fri['FRI_count'].sum()

no_bri_yes_fri_forecast_count = len(df_no_bri_yes_fri)

print("(ATL) Statistics for storms with no best intensity rapid intensification but with forecast RI:")
print("========================================================================================")
print(f"Number of forecast rapid intensification events on valid hours: {no_bri_yes_fri_num_events}")
print(f"Number of forecasts with rapid intensification event on a valid hour: {no_bri_yes_fri_forecast_count}")
print(f"{len(no_bri_yes_fri_storm_ids)} Storms with forecast rapid intensification:")
print("")
print(no_bri_yes_fri_storm_ids)
print("")

no_bri_yes_fri_errors = {}
print("Number of best intensity rapid intensification events for each forecast valid hour:")
for column_name in forecast_rapid_intensification_column_name_to_hour.keys():
    no_bri_yes_fri_rows = df_no_bri_yes_fri.loc[df_no_bri_yes_fri[column_name] == True]
    count = len(no_bri_yes_fri_rows)
    if count > 0:
        print(f"{column_name} : {count}")
    
    valid_h = forecast_rapid_intensification_column_name_to_hour[column_name]
    intensity_err_column_name = intensity_err_hour_to_column_name[valid_h]
    no_bri_yes_fri_errors[valid_h] = []
    for idx in no_bri_yes_fri_rows.index:
        prev_no_bri_yes_fri = no_bri_yes_fri_errors[valid_h]
        intensity_err = no_bri_yes_fri_rows.at[idx,intensity_err_column_name]
        prev_no_bri_yes_fri.append(intensity_err)
        no_bri_yes_fri_errors[valid_h] = prev_no_bri_yes_fri

# remove empty keys
no_bri_yes_fri_errors = dict((k, v) for k, v in no_bri_yes_fri_errors.items() if v)

print("")
print("Table 4.")
# convert to dataframe and compute some stats
print("(ATL) Forecast intensity error for best intensity no rapid intensification but with forecast RI\n(columns are forecast valid hours):")
df_no_bri_yes_fri_errors = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in no_bri_yes_fri_errors.items() ]))
# save table
df_no_bri_yes_fri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]).to_csv('table4.csv')
df_no_bri_yes_fri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99])

(ATL) Statistics for storms with no best intensity rapid intensification but with forecast RI:
Number of forecast rapid intensification events on valid hours: 62
Number of forecasts with rapid intensification event on a valid hour: 49
10 Storms with forecast rapid intensification:

{'AL052001', 'AL061996', 'AL111996', 'AL071992', 'AL132022', 'AL051993', 'AL152022', 'AL011994', 'AL052000', 'AL062002'}

Number of best intensity rapid intensification events for each forecast valid hour:
12hFRI : 42
24hFRI : 18
48hFRI : 2

Table 4.
(ATL) Forecast intensity error for best intensity no rapid intensification but with forecast RI
(columns are forecast valid hours):


Unnamed: 0,12,24,48
count,42.0,18.0,2.0
mean,6.428571,16.111111,15.0
std,7.265498,14.095844,0.0
min,-5.0,-5.0,15.0
1%,-5.0,-3.3,15.0
2%,-5.0,-1.6,15.0
3%,-5.0,0.1,15.0
4%,-5.0,1.8,15.0
5%,-5.0,3.5,15.0
6%,-5.0,5.0,15.0


In [20]:
# Note: storms forecast RI that also have best intensity RI (not necessarily at the correct valid time)

no_bri_storm_ids = all_storm_ids.difference(bri_storm_ids)
no_fri_storm_ids = all_storm_ids.difference(fri_storm_ids)

# naive validation since forecast may have gotten RI for the wrong time/location/reason
tp = len(fri_storm_ids.intersection(bri_storm_ids))
pct_tp = 100.0 * tp / len(bri_storm_ids)
fn = len(no_fri_storm_ids.intersection(bri_storm_ids))
pct_fn = 100.0 * fn / len(bri_storm_ids)

print("(ATL) RI Storms:")
print(f"% Storms with best intensity RI that were forecast with RI (TP): {pct_tp:2.2f} % ({tp})")
print(f"% Storms with best intensity RI that were not forecast with RI (FN): {pct_fn:2.2f} % ({fn})")
print("")
print("(ATL) Non-RI Storms:")
fp = len(fri_storm_ids.intersection(no_bri_storm_ids))
pct_fp = 100.0 * fp / len(no_bri_storm_ids)
tn = len(no_fri_storm_ids.intersection(no_bri_storm_ids))
pct_tn = 100.0 * tn / len(no_bri_storm_ids)
print(f"% Storms with no best intensity RI that were forecast with RI (FP): {pct_fp:2.2f} % ({fp})")
print(f"% Storms with no best intensity RI that were not forecast with RI (TN): {pct_tn:2.2f} % ({tn})")


(ATL) RI Storms:
% Storms with best intensity RI that were forecast with RI (TP): 30.39 % (55)
% Storms with best intensity RI that were not forecast with RI (FN): 69.61 % (126)

(ATL) Non-RI Storms:
% Storms with no best intensity RI that were forecast with RI (FP): 2.75 % (10)
% Storms with no best intensity RI that were not forecast with RI (TN): 97.25 % (354)


In [21]:
### Statistics for all storms (all storms considered, RI and no RI)

all_storm_ids = set(df_ri['STMID'].to_list())
num_storms = len(all_storm_ids)
print(f"(ATL) Total storms: {num_storms}")
print("")

all_errors = {}
for [valid_h, column_name] in forecast_intensity_error_hour_to_column_name.items():
    all_errors[valid_h] = []
    for idx in df_ri.index:
        prev = all_errors[valid_h]
        intensity_err = df_ri.at[idx, column_name]
        prev.append(intensity_err)
        all_errors[valid_h] = prev

# remove empty keys
all_errors = dict((k, v) for k, v in all_errors.items() if v)

print("")
print("Table 5.")
# convert to dataframe and compute some stats
print("(ATL) Forecast intensity error for all storms (columns are forecast valid hours):")
df_all_errors = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in all_errors.items() ]))
# save table
df_all_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]).to_csv('table5.csv')
df_all_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99])

(ATL) Total storms: 545


Table 5.
(ATL) Forecast intensity error for all storms (columns are forecast valid hours):


Unnamed: 0,0,12,24,36,48,60,72,96,120,144,168
count,10868.0,9885.0,8879.0,7926.0,7029.0,830.0,5568.0,2912.0,2284.0,0.0,0.0
mean,-0.798215,-0.148811,-0.00901,-0.132475,-0.24968,0.078313,-0.1787,-0.911745,-1.705342,,
std,4.078354,8.676956,12.686985,15.582935,18.102137,14.488459,21.81127,21.977193,24.004788,,
min,-35.0,-60.0,-70.0,-80.0,-95.0,-55.0,-90.0,-90.0,-100.0,,
1%,-15.0,-25.0,-35.0,-45.0,-50.0,-40.0,-60.0,-60.0,-70.0,,
2%,-10.0,-20.0,-30.0,-35.0,-40.0,-35.0,-50.0,-50.0,-60.0,,
3%,-10.0,-20.0,-25.0,-30.0,-35.0,-30.65,-45.0,-45.0,-50.0,,
4%,-10.0,-15.0,-25.0,-30.0,-35.0,-30.0,-40.0,-45.0,-45.0,,
5%,-5.0,-15.0,-20.0,-25.0,-30.0,-25.0,-40.0,-40.0,-40.0,,
6%,-5.0,-15.0,-20.0,-25.0,-30.0,-25.0,-35.0,-35.0,-40.0,,


In [22]:
#### ATLANTIC
# Initial thoughts... Table 1 and Table 4 have a small number of samples for tau >= 48h

# Hypothetical basis for considering probabilities using OFCL intensities alone from forecast advisories:
#  for tau 12 to 48h:
#   when calculating base rates using the above tables and statistics, check if there is RI in the forecast:
#   if there is RI in the forecast:
#      note that % RI false positives is only ~ 3%: so reference,
#         2.75 % to weight (pct_fp) on Table 4 calculations
#         97.25 % to weight (pct_tn) on Table 1 calculations
#
#   if there is no RI in the forecast:
#      note only ~33% of storms have RI historically (in the last ~30 years): so reference,
#         66.79 % to weight (pct_no_bri) on Table 3 calculations
#         33.21 % to weight (pct_bri) on Table 2 calculations
# for tau > 48h (not enough samples for RI from Table 1):
#   rely on Table 5

# This likely provides a lower bound since skill has increased since 1989

In [23]:
#### ATLANTIC EXAMPLES

In [24]:
# Example, Hurricane Nigel (2023)
# RI is explicitly forecast 24H in advance (30kt increase from 70kt to 100kt)
# Interested in the probability it will be a category 3 storm (>95kt),
#   remembering that the NHC only uses steps of 5kt that means 100kt must be forecast
#     assuming the NHC rounds for the intensity, means the error <= 2.5kt
#   which means the percentiles for Table 1 and Table 4 where the error is <= 2.5kt are most relevant
#     for Table 1 @24H this is at 55% percentile
#     for Table 4 @24H this is at 5% percentile
#   weighting these gives:
nigel_24h_RI_forecast_for_cat3_at_11AM_Sept_18_2023 = (0.9725 * 0.55) + (0.0275 * 0.05)
print(nigel_24h_RI_forecast_for_cat3_at_11AM_Sept_18_2023)

# 30 hours later...
# RI did not happen, and now RI is not forecast
# VMAX is 85kt at 5PM, with forecast of 95kt in 12H, what is the probability it will be a category 3 storm in 12 hours?
#   increase in VMAX from forecast needs to be 2.5kt (intensity error <= -2.5kt) to round up to 100kt steps
#   for Table 3 @12H that happens 29% of time
#   for Table 2 @12H that happens 96% of time
#         66.79 % to weight (pct_no_bri) on Table 3 calculations
#         33.21 % to weight (pct_bri) on Table 2 calculations
nigel_12h_forecast_for_cat3_at_5PM_Sept_19_2023 = (0.6679 * 0.29) + (0.3321 * 0.96)
print(nigel_12h_forecast_for_cat3_at_5PM_Sept_19_2023)

0.5362500000000001
0.512507


In [25]:
# Example, TS Ophelia (2023), will it become a cat. 1 in 12 hours?
# RI is not forecast 12H in advance
#   OFCL 2PM Intermediate Advisory forecast is for 55kt in 12 hours
#   VMAX intensisty error <= -10kt
#      for Table 3 @12 H this is 11%
#      for Table 2 @12 H this is 88%
#         66.79 % to weight (pct_no_bri) on Table 3 calculations
#         33.21 % to weight (pct_bri) on Table 2 calculations
ophelia_12h_forecast_for_cat1_at_2PM_Sept_22_2023 = (0.6679 * 0.11) + (0.3321 * 0.88)
print(ophelia_12h_forecast_for_cat1_at_2PM_Sept_22_2023)


# Example, TS Ophelia (2023), will it make landfall as a cat. 1 in 24 hours?
# RI is not forecast 24H in advance
#   OFCL 2PM Advisory forecast is for 50kt in 24 hours
#   VMAX intensity error <= -15kt
#      for Table 3 @24 H this is 7%
#      for Table 2 @24 H this is 84%
#         66.79 % to weight (pct_no_bri) on Table 3 calculations
#         33.21 % to weight (pct_bri) on Table 2 calculations
ophelia_24h_forecast_for_cat1_landfall_at_2PM_Sept_22_2023 = (0.6679 * 0.07) + (0.3321 * 0.84)
print(ophelia_24h_forecast_for_cat1_landfall_at_2PM_Sept_22_2023)

# Example, TS Ophelia (2023), will it become a cat. 1 in 12 hours?
# RI is not forecast 12H in advance
#   OFCL 5PM Advisory forecast is for 60kt in 12 hours
#   VMAX intensity error <= -5kt
#      for Table 3 @12 H this is 29%
#      for Table 2 @12 H this is 96%
#         66.79 % to weight (pct_no_bri) on Table 3 calculations
#         33.21 % to weight (pct_bri) on Table 2 calculations
ophelia_12h_forecast_for_cat1_at_5PM_Sept_22_2023 = (0.6679 * 0.29) + (0.3321 * 0.96)
print(ophelia_12h_forecast_for_cat1_at_5PM_Sept_22_2023)

0.365717
0.325717
0.512507


In [26]:
# Example, TS Philipe (2023), will it become a cat. 1 in 120H hours?
# RI is not forecast 120H in advance
#   OFCL (2100 UTC MON OCT 02 2023) is for 65kt in 120 hours:
#     OUTLOOK VALID 07/1800Z 35.0N  60.0W
#     MAX WIND  65 KT...GUSTS  80 KT.
#   VMAX intensity error <= 0 kt
#      for Table 3 @120 H this is 45%
#      for Table 2, there is no @120 H
#   Looking at the path there is no reason to expect RI, so use only the Table 3 look up value of 45%


# Example, TS Philipe (2023), will it become a cat. 1 in 120H hours?
# RI is not forecast 120H in advance
#   OFCL (0900 UTC TUE OCT 03 2023) is for 55kt in 120 hours:
#      OUTLOOK VALID 08/0600Z 38.5N  61.5W
#      MAX WIND  55 KT...GUSTS  65 KT.
#   VMAX intensity error <= -10 kt
#      for Table 3 @120 H this is 31% (taking the middle value this time)
#      for Table 2, there is no @120 H
#   Looking at the path there is no reason to expect RI, so use only the Table 3 look up value of 31%

In [None]:
# example, TS Sean (2023), will it become a cat 1 in the next 120H?:
"""Time of Latest Forecast: 2023-10-11 18:00
                                                                      Table 5
Forecast Hour	Latitude	Longitude	Intensity err (for cat1)
0	11.4	325.1	35                    -30                        <1%
12	12.1	323.5	35                    -30                        <1%
24	12.7	321.9	40                    -25                        <5%
36	13.4	320.3	40                    -25                        <8%
48	14.3	318.9	40                    -25                        <11%
72	16.4	316.4	35                    -30                        <11%
96	18.1	313.9	30                    -35                        <8%
120	18.7	309.8	30                    -35                        <10%


What about RI?
https://tropic.ssec.wisc.edu/real-time/ai-ri/al192023_history.html
(Use SHIPS LRE for everything other than 20/12, 25/24, otherwise use AI/RI or what ever is highest probability for most conservative estimate)

Sean at 10/12/23 00 UTC. Intensity = 30 kts, MPI = 144 kts	
Highest is 9.7% at +65 kt in 72h (SHIPS-RII)

Sean at 10/11/23 18 UTC. Intensity = 35 kts, MPI = 144 kts	   
Highest is 13% at +65 kt in 72 h (AI-RI)

The highest percent is then 10% (NHC) to 13% (RI) for an average of 11.5%

""""


In [27]:
# todo:
# double check what is the base intensity time (000h) used for each forecast advisory... since it seems to mention 
#synoptic time vs advisory time
# -2 hours synoptic: earliest (dynamical) guidance starts to arrive (incomplete products); start work on tropical outlook
# -1 hour synoptic: work on intermediate advisory
# -0.5 synoptic: earliest intermediate advisory
# 0 synoptic: intermediate advisory valid time, TC warnings issued
# +0.75 synopic: data arrives for initial state of storm system (fixes)
# +1 synoptic: dynamical models initialized (GFS, HMON, HWRF), statistical models run; a deck file becomes available

# parse multiple forecast advisories and create a df from them
# calculate and update base time intensity from previous forecasts for future forecasts (since it is missing from the advisory)
# for each valid hour in the advisory, determine if RI is in the forecast (use estimate intensity as base intensity)
# for each table's data columns calculate the probability
# source https://www.nhc.noaa.gov/archive/text/TCMAT{1-5}/2023/

# storm_bins = list(range(1, 5))


In [28]:






















#####################
# EAST PACIFIC
#####################


























In [29]:
# extend to 72h for EPAC which has more samples
def is_rapid_intensification(valid_h, base_intensity, valid_intensity):
    ri = False
    if np.isnan(base_intensity) or np.isnan(valid_intensity):
        return ri
    if valid_h == 0:
        # don't do RI for base time (000h)
        return ri
    # https://journals.ametsoc.org/view/journals/wefo/35/6/WAF-D-19-0253.1.xml#bib15
    # 'RI is therefore defined as an increase of at least 20 kt in 12 h, 30 kt in 24 h, 45 kt in 36 h, and 55 kt in 48 h'
    intensity_change = valid_intensity - base_intensity
    rapid_intensification_threshold = np.NaN
    if valid_h <= 12:
        rapid_intensification_threshold = 20
    elif valid_h <= 24:
        rapid_intensification_threshold = 30
    elif valid_h <= 36:
        rapid_intensification_threshold = 45
    elif valid_h <= 48:
        rapid_intensification_threshold = 55
    elif valid_h <= 60:
        rapid_intensification_threshold = 60
    elif valid_h <= 72:
        rapid_intensification_threshold = 65
    # only consider rapid intensification for the above periods
    if np.isnan(rapid_intensification_threshold):
        return ri
    if (intensity_change >= rapid_intensification_threshold):
        ri = True
    return ri

In [30]:
[header, df] = read_intensity_err(filepath_ti_errs_pac)
# track and intensity (used for naming files)
error_category = 'TI'

In [31]:
# create mappings and create new columns for calculations and analysis

column_names = list(df.keys())
# examples of column names with 48 hours in parenthesis
# (48hI01) intensity valid forecast hours
intensity_err_column_name_to_hour = {}
intensity_err_hour_to_column_name = {}
# (48hFI) forecast intensity for valid hours of the forecast (missing so recalculate from data)
forecast_intensity_column_name_to_hour = {}
forecast_intensity_hour_to_column_name = {}
# (48hBI) best intensity for valid hours of the forecast
best_intensity_column_name_to_hour = {}
best_intensity_hour_to_column_name = {}
# (48hFRI) forecast rapid intensification
forecast_rapid_intensification_column_name_to_hour = {}
forecast_rapid_intensification_hour_to_column_name = {}
# (48hBRI) best intensity rapid intensification
best_intensity_rapid_intensification_column_name_to_hour = {}
best_intensity_rapid_intensification_hour_to_column_name = {}
# (48hDFI) changes in forecast intensity from base time to valid hour
forecast_intensity_change_column_name_to_hour = {}
forecast_intensity_change_hour_to_column_name = {}
# (48hDBI) changes in best intensity from base time to valid hour
best_intensity_change_column_name_to_hour = {}
best_intensity_change_hour_to_column_name = {}
# (48hFIE) forecast intensity error (forecast intensity - best track intensity)
forecast_intensity_error_column_name_to_hour = {}
forecast_intensity_error_hour_to_column_name = {}

for column_name in column_names:
    if 'hI' in column_name:
        # create mappings from intensity column names to hour
        h = int(column_name.split('hI')[0])
        intensity_err_column_name_to_hour[column_name] = h
        intensity_err_hour_to_column_name[h] = column_name
        
        # create extra columns for forecast intensity for each forecast hour
        forecast_intensity_column_name = f'{h}hFI'
        df[forecast_intensity_column_name] = np.NaN
        forecast_intensity_column_name_to_hour[forecast_intensity_column_name] = h
        forecast_intensity_hour_to_column_name[h] = forecast_intensity_column_name
        
        # create extra columns for best intensity for each forecast hour
        best_intensity_column_name = f'{h}hBI'
        df[best_intensity_column_name] = np.NaN
        best_intensity_column_name_to_hour[best_intensity_column_name] = h
        best_intensity_hour_to_column_name[h] = best_intensity_column_name

        # create extra columns for forecast RI categorization for each forecast hour
        forecast_rapid_intensification_column_name = f'{h}hFRI'
        df[forecast_rapid_intensification_column_name] = False
        forecast_rapid_intensification_column_name_to_hour[forecast_rapid_intensification_column_name] = h
        forecast_rapid_intensification_hour_to_column_name[h] = forecast_rapid_intensification_column_name
        
        # create extra columns for best intensity RI categorization for each forecast hour
        best_intensity_rapid_intensification_column_name = f'{h}hBRI'
        df[best_intensity_rapid_intensification_column_name] = False
        best_intensity_rapid_intensification_column_name_to_hour[best_intensity_rapid_intensification_column_name] = h
        best_intensity_rapid_intensification_hour_to_column_name[h] = best_intensity_rapid_intensification_column_name
        
        # create extra columns for changes in forecast intensity for each forecast hour
        # (forecast intensity at valid hour - forecast intensity at base hour)
        forecast_intensity_change_column_name = f'{h}hDFI'
        df[forecast_intensity_change_column_name] = np.NaN
        forecast_intensity_change_column_name_to_hour[forecast_intensity_change_column_name] = h
        forecast_intensity_change_hour_to_column_name[h] = forecast_intensity_change_column_name
        
        # create extra columns for changes in (best) intensity for each forecast hour
        # (best intensity at valid hour (from other row) - best intensity at base hour)
        best_intensity_change_column_name = f'{h}hDBI'
        df[best_intensity_change_column_name] = np.NaN
        best_intensity_change_column_name_to_hour[best_intensity_change_column_name] = h
        best_intensity_change_hour_to_column_name[h] = best_intensity_change_column_name
        
        # create extra columns for forecast intensity error
        # (forecast intensity at valid hour - best intensity at valid hour)
        forecast_intensity_error_column_name = f'{h}hFIE'
        df[forecast_intensity_error_column_name] = np.NaN
        forecast_intensity_error_column_name_to_hour[forecast_intensity_error_column_name] = h
        forecast_intensity_error_hour_to_column_name[h] = forecast_intensity_error_column_name

  df[forecast_intensity_column_name] = np.NaN
  df[best_intensity_column_name] = np.NaN
  df[forecast_rapid_intensification_column_name] = False
  df[best_intensity_rapid_intensification_column_name] = False
  df[forecast_intensity_change_column_name] = np.NaN
  df[best_intensity_change_column_name] = np.NaN
  df[forecast_intensity_error_column_name] = np.NaN
  df[forecast_intensity_column_name] = np.NaN
  df[best_intensity_column_name] = np.NaN
  df[forecast_rapid_intensification_column_name] = False
  df[best_intensity_rapid_intensification_column_name] = False
  df[forecast_intensity_change_column_name] = np.NaN
  df[best_intensity_change_column_name] = np.NaN
  df[forecast_intensity_error_column_name] = np.NaN


In [32]:
# column for if any of the forecast hours show rapid intensification
# (forecast) rapid intensification
df['FRI'] = False
df['FRI_count'] = 0

# column for if any of the (best intensity) valid hours of a forecast show rapid intensification
# (best intensity) rapid intensification
df['BRI'] = False
df['BRI_count'] = 0

# defragment frame (for performance)
df = df.copy()

  df['FRI'] = False
  df['FRI_count'] = 0
  df['BRI'] = False
  df['BRI_count'] = 0


In [33]:
# calculate forecast intensities and rapid intensification

# make sure to sort so that we always calculate the forecast_base_intensity first in the inner loop below
sorted_intensity_err_hour_to_column_name_items = sorted(intensity_err_hour_to_column_name.items(), key=lambda x:x[0])

for idx in range(0, len(df)):
    base_time = df.iloc[idx]['ds']
    stmid = df.iloc[idx]['STMID']
    forecast_base_intensity = np.NaN
    forecast_rapid_intensification_any = False
    best_intensity_rapid_intensification_any = False
    for [valid_h, valid_column_name] in sorted_intensity_err_hour_to_column_name_items:
        intensity_err = df.loc[idx][valid_column_name]
        best_intensity = df.loc[idx]['WS']
        if not np.isnan(intensity_err):
            valid_time = base_time + timedelta(hours=valid_h)
            valid_best_intensity_row = df.loc[(df['ds'] == valid_time) & (df['STMID'] == stmid)]
            if not valid_best_intensity_row.empty:
                valid_best_intensity = valid_best_intensity_row['WS'].iloc[0]
                if not np.isnan(valid_best_intensity):
                    # calculate forecast intensity from error and best_intensity
                    forecast_intensity = valid_best_intensity + intensity_err

                    # set the base intensity for this forecast
                    if valid_h == 0:
                        forecast_base_intensity = forecast_intensity
                    
                    # set forecast intensity for valid hour
                    forecast_intensity_column_name = forecast_intensity_hour_to_column_name[valid_h]
                    df.at[idx, forecast_intensity_column_name] = forecast_intensity
                    
                    # set best intensity for valid hour
                    best_intensity_column_name = best_intensity_hour_to_column_name[valid_h]
                    df.at[idx, best_intensity_column_name] = valid_best_intensity

                    # calculate forecast rapid intensification for (selected) forecast hours
                    forecast_rapid_intensification_column_name = forecast_rapid_intensification_hour_to_column_name[valid_h]
                    forecast_rapid_intensification = is_rapid_intensification(valid_h, forecast_base_intensity, forecast_intensity)
                    df.at[idx, forecast_rapid_intensification_column_name] = forecast_rapid_intensification
                    if forecast_rapid_intensification:
                        df.at[idx, 'FRI_count'] = df.at[idx, 'FRI_count'] + 1
                        forecast_rapid_intensification_any = True
                    
                    # calculate best intensity rapid intensification for (selected) forecast hours
                    best_intensity_rapid_intensification_column_name = best_intensity_rapid_intensification_hour_to_column_name[valid_h]
                    best_intensity_rapid_intensification = is_rapid_intensification(valid_h, best_intensity, valid_best_intensity)
                    df.at[idx, best_intensity_rapid_intensification_column_name] = best_intensity_rapid_intensification
                    if best_intensity_rapid_intensification:
                        df.at[idx, 'BRI_count'] = df.at[idx, 'BRI_count'] + 1
                        best_intensity_rapid_intensification_any = True
                    
                    # calculate the change in forecast intensity at valid hour
                    forecast_intensity_change = forecast_intensity - forecast_base_intensity
                    forecast_intensity_change_column_name = forecast_intensity_change_hour_to_column_name[valid_h]
                    df.at[idx, forecast_intensity_change_column_name] = forecast_intensity_change
                    
                    # calculate the change in best intensity at valid hour
                    best_intensity_change = valid_best_intensity - best_intensity
                    best_intensity_change_column_name = best_intensity_change_hour_to_column_name[valid_h]
                    df.at[idx, best_intensity_change_column_name] = best_intensity_change
                    
                    # calculate the intensity error
                    forecast_intensity_error = forecast_intensity - valid_best_intensity
                    forecast_intensity_error_column_name = forecast_intensity_error_hour_to_column_name[valid_h]
                    df.at[idx, forecast_intensity_error_column_name] = forecast_intensity_error
                else:
                    print(f"Warning: best_intensity data not available for {valid_h} from forecast index {idx}...")
            else:
                # raise warning
                # miss best_intensity calculations if there is no best track 'ws' data here?
                print(f"Warning: no forecast row for {valid_h} starting from forecast index {idx}...")
    df.at[idx, 'FRI'] = forecast_rapid_intensification_any
    df.at[idx, 'BRI'] = best_intensity_rapid_intensification_any


















































In [34]:
list(df.keys())

['ds',
 'STMID',
 'F012',
 'F024',
 'F036',
 'F048',
 'F060',
 'F072',
 'F096',
 'F120',
 'F144',
 'F168',
 'Lat',
 'Lon',
 'WS',
 '000hT01',
 '012hT01',
 '024hT01',
 '036hT01',
 '048hT01',
 '060hT01',
 '072hT01',
 '096hT01',
 '120hT01',
 '144hT01',
 '168hT01',
 '000hI01',
 '012hI01',
 '024hI01',
 '036hI01',
 '048hI01',
 '060hI01',
 '072hI01',
 '096hI01',
 '120hI01',
 '144hI01',
 '168hI01',
 '0hFI',
 '0hBI',
 '0hFRI',
 '0hBRI',
 '0hDFI',
 '0hDBI',
 '0hFIE',
 '12hFI',
 '12hBI',
 '12hFRI',
 '12hBRI',
 '12hDFI',
 '12hDBI',
 '12hFIE',
 '24hFI',
 '24hBI',
 '24hFRI',
 '24hBRI',
 '24hDFI',
 '24hDBI',
 '24hFIE',
 '36hFI',
 '36hBI',
 '36hFRI',
 '36hBRI',
 '36hDFI',
 '36hDBI',
 '36hFIE',
 '48hFI',
 '48hBI',
 '48hFRI',
 '48hBRI',
 '48hDFI',
 '48hDBI',
 '48hFIE',
 '60hFI',
 '60hBI',
 '60hFRI',
 '60hBRI',
 '60hDFI',
 '60hDBI',
 '60hFIE',
 '72hFI',
 '72hBI',
 '72hFRI',
 '72hBRI',
 '72hDFI',
 '72hDBI',
 '72hFIE',
 '96hFI',
 '96hBI',
 '96hFRI',
 '96hBRI',
 '96hDFI',
 '96hDBI',
 '96hFIE',
 '120hFI',
 '

In [35]:
# save computation
#df.to_parquet()
file_name_parquet = 'NHC_OFCL_' + header['basin'].replace(' ', '_') + '_' + error_category + '.parquet'
df.to_parquet(file_name_parquet)
print(file_name_parquet)

NHC_OFCL_EAST_PAC_1989-2022_TI.parquet


In [36]:
###### CHECKPOINT

In [37]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

file_name_parquet = 'NHC_OFCL_EAST_PAC_1989-2022_TI.parquet'

In [38]:
df_ri = pd.read_parquet(file_name_parquet)

In [39]:
# repeated as above (without df modifications) so it works with checkpointing
# create mappings and create new columns for calculations and analysis

column_names = list(df.keys())
# examples of column names with 48 hours in parenthesis
# (48hI01) intensity valid forecast hours
intensity_err_column_name_to_hour = {}
intensity_err_hour_to_column_name = {}
# (48hFI) forecast intensity for valid hours of the forecast (missing so recalculate from data)
forecast_intensity_column_name_to_hour = {}
forecast_intensity_hour_to_column_name = {}
# (48hBI) best intensity for valid hours of the forecast
best_intensity_column_name_to_hour = {}
best_intensity_hour_to_column_name = {}
# (48hFRI) forecast rapid intensification
forecast_rapid_intensification_column_name_to_hour = {}
forecast_rapid_intensification_hour_to_column_name = {}
# (48hBRI) best intensity rapid intensification
best_intensity_rapid_intensification_column_name_to_hour = {}
best_intensity_rapid_intensification_hour_to_column_name = {}
# (48hDFI) changes in forecast intensity from base time to valid hour
forecast_intensity_change_column_name_to_hour = {}
forecast_intensity_change_hour_to_column_name = {}
# (48hDBI) changes in best intensity from base time to valid hour
best_intensity_change_column_name_to_hour = {}
best_intensity_change_hour_to_column_name = {}

for column_name in column_names:
    if 'hI' in column_name:
        # create mappings from intensity column names to hour
        h = int(column_name.split('hI')[0])
        intensity_err_column_name_to_hour[column_name] = h
        intensity_err_hour_to_column_name[h] = column_name
        
        # create extra columns for forecast intensity for each forecast hour
        forecast_intensity_column_name = f'{h}hFI'
        forecast_intensity_column_name_to_hour[forecast_intensity_column_name] = h
        forecast_intensity_hour_to_column_name[h] = forecast_intensity_column_name
        
        # create extra columns for best intensity for each forecast hour
        best_intensity_column_name = f'{h}hBI'
        best_intensity_column_name_to_hour[best_intensity_column_name] = h
        best_intensity_hour_to_column_name[h] = best_intensity_column_name

        # create extra columns for forecast RI categorization for each forecast hour
        forecast_rapid_intensification_column_name = f'{h}hFRI'
        forecast_rapid_intensification_column_name_to_hour[forecast_rapid_intensification_column_name] = h
        forecast_rapid_intensification_hour_to_column_name[h] = forecast_rapid_intensification_column_name
        
        # create extra columns for best intensity RI categorization for each forecast hour
        best_intensity_rapid_intensification_column_name = f'{h}hBRI'
        best_intensity_rapid_intensification_column_name_to_hour[best_intensity_rapid_intensification_column_name] = h
        best_intensity_rapid_intensification_hour_to_column_name[h] = best_intensity_rapid_intensification_column_name
        
        # create extra columns for changes in forecast intensity for each forecast hour
        # (forecast intensity at valid hour - forecast intensity at base hour)
        forecast_intensity_change_column_name = f'{h}hDFI'
        forecast_intensity_change_column_name_to_hour[forecast_intensity_change_column_name] = h
        forecast_intensity_change_hour_to_column_name[h] = forecast_intensity_change_column_name
        
        # create extra columns for changes in (best) intensity for each forecast hour
        # (best intensity at valid hour (from other row) - best intensity at base hour)
        best_intensity_change_column_name = f'{h}hDBI'
        best_intensity_change_column_name_to_hour[best_intensity_change_column_name] = h
        best_intensity_change_hour_to_column_name[h] = best_intensity_change_column_name
        
        # create extra columns for forecast intensity error
        # (forecast intensity at valid hour - best intensity at valid hour)
        forecast_intensity_error_column_name = f'{h}hFIE'
        forecast_intensity_error_column_name_to_hour[forecast_intensity_error_column_name] = h
        forecast_intensity_error_hour_to_column_name[h] = forecast_intensity_error_column_name

In [40]:



######## STATS


pd.set_option('display.max_rows', None)


In [41]:
### statistics for forecasted rapid intensification (FRI)
### (forecast RI)

print("(EPAC) Statistics for storms with forecast intensity rapid intensification")
print("==========================================================================")

fri_num_events = df_ri.loc[df_ri['FRI'] == True]['FRI_count'].sum()
fri_forecast_count = len(df_ri.loc[df_ri['FRI'] == True])
print(f"Number of forecast rapid intensification events on a valid hour: {fri_num_events}")
print(f"Number of forecasts with a forecast rapid intensification event on a valid hour: {fri_forecast_count}")
fri_storm_ids = set()
for idx in df_ri.loc[df_ri['FRI'] == True].index:
    fri_storm_ids.add(df_ri.at[idx,'STMID'])
print(f"{len(fri_storm_ids)} Storms with rapid intensification in forecast:")
print(fri_storm_ids)
print("")

fri_errors = {}
print("Number of forecast rapid intensification events for each forecast valid hour:")
for column_name in forecast_rapid_intensification_column_name_to_hour.keys():
    fri_rows = df_ri.loc[df_ri[column_name] == True]
    count = len(fri_rows)
    if count > 0:
        print(f"{column_name} : {count}")
    
    valid_h = forecast_rapid_intensification_column_name_to_hour[column_name]
    intensity_err_column_name = intensity_err_hour_to_column_name[valid_h]
    fri_errors[valid_h] = []
    for idx in fri_rows.index:
        prev_fri = fri_errors[valid_h]
        intensity_err = fri_rows.at[idx,intensity_err_column_name]
        prev_fri.append(intensity_err)
        fri_errors[valid_h] = prev_fri

# remove empty keys
fri_errors = dict((k, v) for k, v in fri_errors.items() if v)

print("")
print("Table 6.")
# convert to dataframe and compute some stats
print("(EPAC) Forecast intensity error for forecast rapid intensification (columns are forecast valid hours):")
df_fri_errors = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in fri_errors.items() ]))
# save table
df_fri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]).to_csv('table6.csv')
df_fri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99])

(EPAC) Statistics for storms with forecast intensity rapid intensification
Number of forecast rapid intensification events on a valid hour: 445
Number of forecasts with a forecast rapid intensification event on a valid hour: 239
96 Storms with rapid intensification in forecast:
{'EP032016', 'EP012011', 'EP081998', 'EP031996', 'EP151991', 'EP061992', 'EP152015', 'EP202018', 'EP062022', 'EP242018', 'EP132009', 'EP212018', 'EP112001', 'EP062012', 'EP042010', 'EP182020', 'EP151998', 'EP052002', 'EP032000', 'EP141997', 'EP171992', 'EP062021', 'EP162016', 'EP072019', 'EP082021', 'EP102006', 'EP062017', 'EP032012', 'EP051992', 'EP172021', 'EP061997', 'EP082002', 'EP091995', 'EP121997', 'EP112004', 'EP062019', 'EP092020', 'EP122002', 'EP122018', 'EP072018', 'EP132015', 'EP022013', 'EP202009', 'EP061998', 'EP162018', 'EP111995', 'EP071992', 'EP022019', 'EP132012', 'EP082009', 'EP132014', 'EP032018', 'EP041993', 'EP172013', 'EP162022', 'EP202016', 'EP042011', 'EP052012', 'EP152016', 'EP132011', 

Unnamed: 0,12,24,36,48,60,72
count,113.0,178.0,63.0,49.0,9.0,33.0
mean,-2.477876,-5.758427,-4.365079,-2.755102,1.666667,3.333333
std,13.009679,17.348637,23.970615,20.893112,21.213203,21.311186
min,-50.0,-75.0,-85.0,-55.0,-25.0,-40.0
1%,-45.0,-66.15,-57.1,-47.8,-24.6,-36.8
2%,-41.4,-42.3,-40.0,-40.6,-24.2,-33.6
3%,-30.0,-40.0,-40.0,-40.0,-23.8,-30.4
4%,-27.6,-35.0,-40.0,-40.0,-23.4,-28.6
5%,-25.0,-35.0,-39.5,-36.0,-23.0,-27.0
6%,-25.0,-30.0,-36.4,-31.2,-22.6,-25.4


In [42]:
### statistics for best intensity rapid intensification (BRI)
## ("observed" RI)

print("(EPAC) Statistics for storms with best intensity rapid intensification:")
print("=======================================================================")

bri_num_events = df_ri.loc[df_ri['BRI'] == True]['BRI_count'].sum()
bri_forecast_count = len(df_ri.loc[df_ri['BRI'] == True])
print(f"Number of best intensity rapid intensification events on valid hours: {bri_num_events}")
print(f"Number of forecasts with a best intensity rapid intensification event on a valid hour: {bri_forecast_count}")
bri_storm_ids = set()
for idx in df_ri.loc[df_ri['BRI'] == True].index:
    bri_storm_ids.add(df_ri.at[idx,'STMID'])
print(f"{len(bri_storm_ids)} Storms with best intensity rapid intensification:")
print("")
print(bri_storm_ids)
print("")

bri_errors = {}
print("Number of best intensity rapid intensification events for each forecast valid hour:")
for column_name in best_intensity_rapid_intensification_column_name_to_hour.keys():
    bri_rows = df_ri.loc[df_ri[column_name] == True]
    count = len(bri_rows)
    if count > 0:
        print(f"{column_name} : {count}")
    
    valid_h = best_intensity_rapid_intensification_column_name_to_hour[column_name]
    intensity_err_column_name = intensity_err_hour_to_column_name[valid_h]
    bri_errors[valid_h] = []
    for idx in bri_rows.index:
        prev_bri = bri_errors[valid_h]
        intensity_err = bri_rows.at[idx,intensity_err_column_name]
        prev_bri.append(intensity_err)
        bri_errors[valid_h] = prev_bri

# remove empty keys
bri_errors = dict((k, v) for k, v in bri_errors.items() if v)

print("")
print("Table 7.")
# convert to dataframe and compute some stats
print("(EPAC) Forecast intensity error for best intensity rapid intensification (columns are forecast valid hours):")
df_bri_errors = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in bri_errors.items() ]))
# save table
df_bri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]).to_csv('table7.csv')
df_bri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99])

(EPAC) Statistics for storms with best intensity rapid intensification:
Number of best intensity rapid intensification events on valid hours: 2933
Number of forecasts with a best intensity rapid intensification event on a valid hour: 1391
231 Storms with best intensity rapid intensification:

{'EP092015', 'EP032016', 'EP012011', 'EP092002', 'EP122022', 'EP091996', 'EP071993', 'EP081998', 'EP042012', 'EP031996', 'EP082016', 'EP012002', 'EP102002', 'EP061992', 'EP022002', 'EP012005', 'EP052010', 'EP152015', 'EP202018', 'EP071994', 'EP101990', 'EP071999', 'EP062022', 'EP191990', 'EP162001', 'EP231990', 'EP132003', 'EP061993', 'EP152005', 'EP242018', 'EP062005', 'EP132009', 'EP212018', 'EP072012', 'EP032003', 'EP112001', 'EP191994', 'EP062007', 'EP102015', 'EP172000', 'EP042010', 'EP032011', 'EP092022', 'EP051998', 'EP092014', 'EP211992', 'EP182020', 'EP131993', 'EP151998', 'EP032000', 'EP052002', 'EP141997', 'EP171992', 'EP182014', 'EP062021', 'EP162016', 'EP052016', 'EP082021', 'EP071990

Unnamed: 0,12,24,36,48,60,72
count,652.0,830.0,585.0,490.0,41.0,335.0
mean,-15.41411,-22.879518,-32.333333,-37.72449,-28.170732,-40.447761
std,8.906253,12.426767,14.97658,17.349281,15.95917,18.117451
min,-55.0,-75.0,-100.0,-105.0,-60.0,-110.0
1%,-45.0,-60.0,-70.0,-81.65,-56.0,-88.3
2%,-35.0,-50.0,-70.0,-75.0,-52.0,-80.0
3%,-35.0,-45.65,-62.4,-70.0,-50.0,-75.0
4%,-30.0,-45.0,-60.0,-70.0,-50.0,-75.0
5%,-30.0,-45.0,-55.0,-67.75,-50.0,-70.0
6%,-30.0,-45.0,-55.0,-65.0,-50.0,-70.0


In [43]:
### Basic statistics for number of storms and RI storms
print("Basic statistics for all Eastern Pacific storms")
print("===============================================")

all_storm_ids = set(df_ri['STMID'].to_list())
num_storms = len(all_storm_ids)
pct_bri = 100.0 * len(bri_storm_ids) / num_storms
pct_no_bri = 100.0 - pct_bri
pct_fri = 100.0 * len(fri_storm_ids) / num_storms
pct_no_fri = 100.0 - pct_fri
print(f"Total storms: {num_storms}")
print("")
print(f"% of all storms with best intensity rapid intensification: {pct_bri:2.2f} %")
print(f"% of all storms with no best intensity rapid intensification: {pct_no_bri:2.2f} %")
print("")
print(f"% of all storms with forecast rapid intensification: {pct_fri:2.2f} %")
print(f"% of all storms with no forecast rapid intensification: {pct_no_fri:2.2f} %")

Basic statistics for all Eastern Pacific storms
Total storms: 612

% of all storms with best intensity rapid intensification: 37.75 %
% of all storms with no best intensity rapid intensification: 62.25 %

% of all storms with forecast rapid intensification: 15.69 %
% of all storms with no forecast rapid intensification: 84.31 %


In [44]:
### Statistics for storms with no best intensity rapid intensification
## ("observed" NO RI)

no_bri_storm_ids = all_storm_ids.difference(bri_storm_ids)
# the number of valid forecast hour columns (including base time)
num_valid_hour_columns = len(best_intensity_rapid_intensification_hour_to_column_name)

df_no_bri = df_ri.loc[df_ri['BRI'] == False]
# subtract one since the base time can never have rapid intensification
no_bri_num_events = (len(df_no_bri) * (num_valid_hour_columns - 1))

no_bri_forecast_count = len(df_no_bri)

print("(EPAC) Statistics for storms with no best intensity rapid intensification:")
print("==========================================================================")
print(f"Number of best intensity no rapid intensification events on valid hours: {no_bri_num_events}")
print(f"Number of forecasts with no best intensity rapid intensification event on a valid hour: {no_bri_forecast_count}")
print(f"{len(no_bri_storm_ids)} Storms with no best intensity rapid intensification:")
print("")
print(no_bri_storm_ids)
print("")

no_bri_errors = {}
for [valid_h, column_name] in intensity_err_hour_to_column_name.items():
    no_bri_errors[valid_h] = []
    for idx in df_no_bri.index:
        prev_no_bri = no_bri_errors[valid_h]
        intensity_err = df_no_bri.at[idx, column_name]
        prev_no_bri.append(intensity_err)
        no_bri_errors[valid_h] = prev_no_bri

# remove empty keys
no_bri_errors = dict((k, v) for k, v in no_bri_errors.items() if v)

print("")
print("Table 8.")
# convert to dataframe and compute some stats
print("(EPAC) Forecast intensity error for best intensity no rapid intensification (columns are forecast valid hours):")
df_no_bri_errors = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in no_bri_errors.items() ]))
# save table
df_no_bri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]).to_csv('table8.csv')
df_no_bri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99])

(EPAC) Statistics for storms with no best intensity rapid intensification:
Number of best intensity no rapid intensification events on valid hours: 106380
Number of forecasts with no best intensity rapid intensification event on a valid hour: 10638
381 Storms with no best intensity rapid intensification:

{'EP112017', 'EP042000', 'EP102008', 'EP182015', 'EP072016', 'EP172017', 'EP042001', 'EP111990', 'EP162015', 'EP061989', 'EP202014', 'EP111991', 'EP102001', 'EP182019', 'EP052009', 'EP032019', 'EP062012', 'EP191992', 'EP172014', 'EP081995', 'EP122011', 'EP072019', 'EP032015', 'EP011997', 'EP141990', 'EP092000', 'EP082006', 'EP082008', 'EP052020', 'EP131999', 'EP122013', 'EP032005', 'EP041998', 'EP032009', 'EP151989', 'EP181992', 'EP122017', 'EP062010', 'EP062019', 'EP091994', 'EP142013', 'EP061996', 'EP101997', 'EP022003', 'EP162000', 'EP062006', 'EP122019', 'EP122012', 'EP062016', 'EP212015', 'EP222016', 'EP082018', 'EP012020', 'EP142020', 'EP072014', 'EP192017', 'EP102021', 'EP04199

Unnamed: 0,0,12,24,36,48,60,72,96,120,144,168
count,9496.0,8576.0,7525.0,6543.0,5639.0,435.0,4194.0,1567.0,1053.0,0.0,0.0
mean,-0.342249,1.426073,3.049568,3.699373,3.459833,6.781609,3.749404,1.393108,-0.396011,,
std,3.667136,7.510608,11.64626,14.705567,16.867326,15.22067,19.790313,20.126481,20.538008,,
min,-25.0,-30.0,-50.0,-55.0,-60.0,-40.0,-85.0,-80.0,-75.0,,
1%,-10.0,-15.0,-25.0,-30.0,-40.0,-30.0,-45.0,-45.0,-55.0,,
2%,-10.0,-15.0,-20.0,-25.0,-35.0,-30.0,-40.0,-40.0,-45.0,,
3%,-10.0,-10.0,-20.0,-25.0,-30.0,-25.0,-35.0,-40.0,-40.0,,
4%,-5.0,-10.0,-15.0,-20.0,-25.0,-25.0,-35.0,-35.0,-40.0,,
5%,-5.0,-10.0,-15.0,-20.0,-25.0,-20.0,-30.0,-35.0,-35.0,,
6%,-5.0,-10.0,-15.0,-20.0,-25.0,-20.0,-30.0,-35.0,-35.0,,


In [45]:
### Statistics for storms with no best intensity rapid intensification but with forecast rapid intensification
## ("observed" NO RI, YES forecast RI)

no_bri_yes_fri_storm_ids = no_bri_storm_ids.intersection(fri_storm_ids)
df_no_bri_yes_fri = df_ri.loc[(df_ri['BRI'] == False) & (df_ri['FRI'] == True)]
no_bri_yes_fri_num_events = df_no_bri_yes_fri['FRI_count'].sum()
no_bri_yes_fri_forecast_count = len(df_no_bri_yes_fri)

# subtract one since the base time can never have rapid intensification
no_bri_yes_fri_forecast_count = len(df_no_bri_yes_fri)

print("(EPAC) Statistics for storms with no best intensity rapid intensification but with forecast RI:")
print("========================================================================================")
print(f"Number of forecast rapid intensification events on valid hours: {no_bri_yes_fri_num_events}")
print(f"Number of forecasts with rapid intensification event on a valid hour: {no_bri_yes_fri_forecast_count}")
print(f"{len(no_bri_yes_fri_storm_ids)} Storms with forecast rapid intensification:")
print("")
print(no_bri_yes_fri_storm_ids)
print("")

no_bri_yes_fri_errors = {}
print("Number of best intensity rapid intensification events for each forecast valid hour:")
for column_name in forecast_rapid_intensification_column_name_to_hour.keys():
    no_bri_yes_fri_rows = df_no_bri_yes_fri.loc[df_no_bri_yes_fri[column_name] == True]
    count = len(no_bri_yes_fri_rows)
    if count > 0:
        print(f"{column_name} : {count}")
    
    valid_h = forecast_rapid_intensification_column_name_to_hour[column_name]
    intensity_err_column_name = intensity_err_hour_to_column_name[valid_h]
    no_bri_yes_fri_errors[valid_h] = []
    for idx in no_bri_yes_fri_rows.index:
        prev_no_bri_yes_fri = no_bri_yes_fri_errors[valid_h]
        intensity_err = no_bri_yes_fri_rows.at[idx,intensity_err_column_name]
        prev_no_bri_yes_fri.append(intensity_err)
        no_bri_yes_fri_errors[valid_h] = prev_no_bri_yes_fri

# remove empty keys
no_bri_yes_fri_errors = dict((k, v) for k, v in no_bri_yes_fri_errors.items() if v)

print("")
print("Table 9.")
# convert to dataframe and compute some stats
print("(EPAC) Forecast intensity error for best intensity no rapid intensification but with forecast RI\n(columns are forecast valid hours):")
df_no_bri_yes_fri_errors = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in no_bri_yes_fri_errors.items() ]))
# save table
df_no_bri_yes_fri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]).to_csv('table9.csv')
df_no_bri_yes_fri_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99])

(EPAC) Statistics for storms with no best intensity rapid intensification but with forecast RI:
Number of forecast rapid intensification events on valid hours: 88
Number of forecasts with rapid intensification event on a valid hour: 60
7 Storms with forecast rapid intensification:

{'EP062019', 'EP061997', 'EP162021', 'EP062012', 'EP122002', 'EP151991', 'EP072019'}

Number of best intensity rapid intensification events for each forecast valid hour:
12hFRI : 32
24hFRI : 36
36hFRI : 10
48hFRI : 6
72hFRI : 4

Table 9.
(EPAC) Forecast intensity error for best intensity no rapid intensification but with forecast RI
(columns are forecast valid hours):


Unnamed: 0,12,24,36,48,72
count,32.0,36.0,10.0,6.0,4.0
mean,6.875,12.083333,23.5,21.666667,27.5
std,9.223917,7.007649,9.44281,7.527727,12.583057
min,-15.0,0.0,10.0,15.0,10.0
1%,-11.9,1.75,10.0,15.0,10.6
2%,-8.8,3.5,10.0,15.0,11.2
3%,-5.7,5.0,10.0,15.0,11.8
4%,-5.0,5.0,10.0,15.0,12.4
5%,-5.0,5.0,10.0,15.0,13.0
6%,-5.0,5.0,10.0,15.0,13.6


In [46]:
# Note: storms forecast RI that also have best intensity RI (not necessarily at the correct valid time)

no_bri_storm_ids = all_storm_ids.difference(bri_storm_ids)
no_fri_storm_ids = all_storm_ids.difference(fri_storm_ids)

# naive validation since forecast may have gotten RI for the wrong time/location/reason
tp = len(fri_storm_ids.intersection(bri_storm_ids))
pct_tp = 100.0 * tp / len(bri_storm_ids)
fn = len(no_fri_storm_ids.intersection(bri_storm_ids))
pct_fn = 100.0 * fn / len(bri_storm_ids)

print("(EPAC) RI Storms:")
print(f"% Storms with best intensity RI that were forecast with RI (TP): {pct_tp:2.2f} % ({tp})")
print(f"% Storms with best intensity RI that were not forecast with RI (FN): {pct_fn:2.2f} % ({fn})")
print("")
print("(EPAC) Non-RI Storms:")
fp = len(fri_storm_ids.intersection(no_bri_storm_ids))
pct_fp = 100.0 * fp / len(no_bri_storm_ids)
tn = len(no_fri_storm_ids.intersection(no_bri_storm_ids))
pct_tn = 100.0 * tn / len(no_bri_storm_ids)
print(f"% Storms with no best intensity RI that were forecast with RI (FP): {pct_fp:2.2f} % ({fp})")
print(f"% Storms with no best intensity RI that were not forecast with RI (TN): {pct_tn:2.2f} % ({tn})")


(EPAC) RI Storms:
% Storms with best intensity RI that were forecast with RI (TP): 38.53 % (89)
% Storms with best intensity RI that were not forecast with RI (FN): 61.47 % (142)

(EPAC) Non-RI Storms:
% Storms with no best intensity RI that were forecast with RI (FP): 1.84 % (7)
% Storms with no best intensity RI that were not forecast with RI (TN): 98.16 % (374)


In [47]:
### Statistics for all storms (all storms considered, RI and no RI)

all_storm_ids = set(df_ri['STMID'].to_list())
num_storms = len(all_storm_ids)
print(f"(EPAC) Total storms: {num_storms}")
print("")

all_errors = {}
for [valid_h, column_name] in forecast_intensity_error_hour_to_column_name.items():
    all_errors[valid_h] = []
    for idx in df_ri.index:
        prev = all_errors[valid_h]
        intensity_err = df_ri.at[idx, column_name]
        prev.append(intensity_err)
        all_errors[valid_h] = prev

# remove empty keys
all_errors = dict((k, v) for k, v in all_errors.items() if v)

print("")
print("Table 10.")
# convert to dataframe and compute some stats
print("(EPAC) Forecast intensity error for all storms (columns are forecast valid hours):")
df_all_errors = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in all_errors.items() ]))
# save table
df_all_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]).to_csv('table10.csv')
df_all_errors.describe(percentiles=[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99])

(EPAC) Total storms: 612


Table 10.
(EPAC) Forecast intensity error for all storms (columns are forecast valid hours):


Unnamed: 0,0,12,24,36,48,60,72,96,120,144,168
count,10887.0,9804.0,8617.0,7508.0,6477.0,516.0,4756.0,1758.0,1081.0,0.0,0.0
mean,-0.406724,-0.04131,0.094929,-0.105887,-0.65694,3.565891,0.361648,2.063709,2.06568,,
std,3.679506,8.622084,13.961141,17.904364,20.470567,18.247956,23.145986,21.041154,20.647983,,
min,-25.0,-55.0,-75.0,-100.0,-105.0,-60.0,-110.0,-65.0,-60.0,,
1%,-10.0,-25.0,-40.0,-50.0,-55.0,-45.0,-60.0,-50.0,-50.0,,
2%,-10.0,-20.0,-30.0,-40.0,-50.0,-40.0,-55.0,-45.0,-45.0,,
3%,-10.0,-20.0,-30.0,-35.0,-45.0,-35.0,-50.0,-40.0,-40.0,,
4%,-5.0,-15.0,-25.0,-35.0,-40.0,-30.0,-45.0,-35.0,-35.0,,
5%,-5.0,-15.0,-25.0,-30.0,-40.0,-30.0,-40.0,-35.0,-35.0,,
6%,-5.0,-15.0,-20.0,-30.0,-35.0,-30.0,-40.0,-35.0,-35.0,,


In [48]:
#### EASTERN PACIFIC
# Initial thoughts...

# Hypothetical basis for considering probabilities using OFCL intensities alone from forecast advisories:
#  for valid forecast hours: 12h <= tau <= 72h:
#   when calculating base rates using the above tables and statistics, check if there is RI in the forecast:
#   if there is RI in the forecast:
#      note that % RI false positives is only ~ 2%: so reference,
#         1.84 % to weight (pct_fp) on Table 9 calculations
#         98.16 % to weight (pct_tn) on Table 6 calculations
#
#   if there is no RI in the forecast:
#      note only ~37% of storms have RI historically (in the last ~30 years): so reference,
#         62.25 % to weight (pct_no_bri) on Table 8 calculations
#         37.75 % to weight (pct_bri) on Table 7 calculations
# for tau > 72h (use base rates):
#   rely on Table 10

# This likely provides a lower bound since skill has increased since 1989