In [1]:
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
This code is for comparing two datasets of located weather events that should
in theory be identical. This code computes how many weather events are recorded by
both datasets, and how many events are recorded in one dataset but not the other. 
"""
import numpy as np
#import pandas as pd

In [2]:
save_results = 0

spatial_tolerance = 5
years = (1979, 2017)

save_path = '/home/561/nxg561/00_Tracking_Scheme_Comparison/Output_Data/PGvsMU/'


In [3]:
#==============================================================================
# Load the datasets as numpy arrays
dataset0_filename = '/home/561/nxg561/00_Tracking_Scheme_Comparison/Input_Data/Nick_Lows/closed_lows_pg_era5_2017.txt'
dataset0 = np.loadtxt(dataset0_filename, delimiter = ',')

dataset1_filename = '/home/561/nxg561/00_Tracking_Scheme_Comparison/Input_Data/Acacia_Lows/closed_lows_mu_era5_2017.txt'
dataset1 = np.loadtxt(dataset1_filename, delimiter = ',')


In [None]:
# This next code loops through each low entry in the first dataset and compares
# its timestep, latitude and longitude to each low entry in the second dataset.
# Matching lows from each dataset are appended to a list. 

# A scalar number of the total number of matching cyclones
matches = 0

# Empty lists that will hold the dataset rows of each of the matching cyclones
dataset0_matches = []
dataset1_matches = []

# Loop through each year
for year in range(years[0], years[1] + 1):

    print(year)

    # Extract the cyclone entries for that year from both datasets
    dataset0_y = dataset0[(dataset0[:,1] == year), :] 
    dataset1_y = dataset1[(dataset1[:,1] == year), :]  
    
    len0 = np.shape(dataset0_y)[0]
    len1 = np.shape(dataset1_y)[0]

    # Loop through the cyclone entries in dataset0
    for i0 in range(0,len0):

        # Loop through the cyclone entries in dataset1
        for i1 in range(0,len1):
            
            cyclone0 = dataset0_y[i0, :]
            cyclone1 = dataset1_y[i1, :]

            # Test if the time steps of the two cyclone entries match
            tstepmatch = cyclone0[5] == cyclone1[5]

            # Test if the lat and lon entries of the two cyclone entries are within the spatial tollerance
            latmatch = np.abs(cyclone0[6] - cyclone1[6]) <= spatial_tolerance
            lonmatch = np.abs(cyclone0[7] - cyclone1[7]) <= spatial_tolerance
            
            if tstepmatch and latmatch and lonmatch:
                
                matches = matches + 1
                
                #np.copy is needed here becayse the numpy arrays dataset0 and 
                #dataset1 are indexed with direct integers (not conditionals)
                dataset0_matches.append(np.copy(cyclone0))
                dataset1_matches.append(np.copy(cyclone1))
                
                # The dataset0_y and dataset1_y are overwritten with np.nan to
                # prevent a system being matched twice. 
                dataset0_y[i0,:] = np.nan
                dataset1_y[i1,:] = np.nan
                
                continue

    # The cyclone entries for each year are written back into the original arrays (both the matched
    # cyclones that are indicated by nans, and the extra cyclone entries that are left over) so the
    # lists of extra systems can be obtained. 
    dataset0[(dataset0[:,1] == year), :] = dataset0_y
    dataset1[(dataset1[:,1] == year), :] = dataset1_y
                          
print('Number of matches: ', matches)
matches_per_year = matches / (years[1] - years[0] + 1)
print('Matches per year: ', matches_per_year)

# Format the output as a numpy array and save to a .txt file here:
dataset0_matches_array = np.array(dataset0_matches)
dataset1_matches_array = np.array(dataset1_matches)

# Get the extra systems from each of the two datasets
ir0 = np.isnan(dataset0[:,0])
dataset0_only = dataset0[(ir0 == False),:]

ir1 = np.isnan(dataset1[:,0])
dataset1_only = dataset1[(ir1 == False),:]

if save_results == 1:

    output_filename = str(save_path) + 'pg_matches.txt'
    np.savetxt(output_filename, dataset0_matches_array, delimiter = ',') 
    
    output_filename = str(save_path) + 'mu_matches.txt'
    np.savetxt(output_filename, dataset1_matches_array, delimiter = ',') 
    
    output_filename = str(save_path) + 'pg_only.txt'
    np.savetxt(output_filename, dataset0_only, delimiter = ',') 
    
    output_filename = str(save_path) + 'mu_only.txt'
    np.savetxt(output_filename, dataset1_only, delimiter = ',') 
    
    #output_filename = str(save_path) + 'mu_all.txt'
    #np.savetxt(output_filename, dataset0_all, delimiter = ',') 
    
    #output_filename = str(save_path) + 'g21_all.txt'
    #np.savetxt(output_filename, dataset1_all, delimiter = ',') 
 
# Compute and display the Critical Success Index
csi = len(dataset0_matches_array) / (len(dataset0_matches_array) + len(dataset0_only) + len(dataset1_only))

print('CSI: ', csi)

1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002


In [24]:
g21_only = np.shape(dataset0_only)[0]
print('G21 lows only')
print(g21_only)

g21_only_year = g21_only / (years[1] - years[0] + 1)
print('G21 lows only per year:')
print(g21_only_year)

G21 lows only
5314
G21 lows only per year:
136.25641025641025


In [25]:
mu_only = np.shape(dataset1_only)[0]
print('MU lows only')
print(mu_only)

mu_only_year = mu_only / (years[1] - years[0] + 1)
print('MU lows only per year:')
print(mu_only_year)

MU lows only
5859
MU lows only per year:
150.23076923076923


In [26]:
print('PG Percent: ', 100 * matches / (matches + g21_only))
print('MU Percent: ', 100 * matches / (matches + mu_only))

PG Percent:  87.71102169187364
MU Percent:  86.61931623541234
