In [1]:
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Fri Mar  2 16:52:50 2018

This script is for comparing two 'lists' of dates of a cut-off low, where the 
number of the events that match is the measure of success. The hits, misses and 
extra entries can be written out to a .txt file. 

INPUT:  -file of cut-off lows identified with tracking scheme (ncep1 only, save_path + 'cutoff_lows_ncep1.txt')
        -file of Mike Pook's cutoff lows (pooklows_for_synoptic_comparison.txt)

OUTPUT: -prints numbers of hit, missed and extra events to the screen (standard output)
        - OPTIONAL OUTPUT:.txt file containing year, month, day, and rain of hit, 
        miss and extra low events

OUTPUT FILE NAME FORMAT: save_path + 'pookhitlist.txt'

@author: Nick Grosfeld
"""
#==============================================================================
import numpy as np
#import pandas as pd

years = (1979,2009) # dataset overlap period 
months = (4,10) # dataset overlap period 
save_results = 1

save_path = '/home/561/nxg561/00_Tracking_Scheme_Comparison/Output_Data/R13vsMU/' 

#==============================================================================
# Load Mike Pook's dataset, and convert it to a 'set' (as in set theory from maths)

training_dataset_filename = '/home/561/nxg561/00_Tracking_Scheme_Comparison/Input_Data/R13/pook2009lows.txt'
training_dataset = np.loadtxt(training_dataset_filename, delimiter = ',')

# This is where a subset of the training dataset could be taken.

overlap_rows = (training_dataset[:,0] >= years[0]) & (training_dataset[:,0] <= years[1])

training_dataset_dates = training_dataset[overlap_rows,0:3]
print(training_dataset_dates[0,:])

training_dataset_dates = map(tuple,training_dataset_dates)
training_dataset_dates = set(training_dataset_dates)

#==============================================================================
# Load the new dataset to be tested, and convert it also to a 'set'

test_dataset_filename = '/home/561/nxg561/00_Tracking_Scheme_Comparison/Input_Data/Acacia_Lows/closed_lows_mu_era5_2009_sea.txt'
test_dataset = np.loadtxt(test_dataset_filename, delimiter = ',')

overlap_rows = (test_dataset[:,1] >= years[0]) & (test_dataset[:,1] <= years[1]) & (test_dataset[:,2] >= months[0]) & (test_dataset[:,2] <= months[1])

test_dataset_dates = test_dataset[overlap_rows,1:4]
print(test_dataset_dates[0,:])

test_dataset_dates = map(tuple,test_dataset_dates)
test_dataset_dates = set(test_dataset_dates)

#==============================================================================
#Now do the intersection and difference of the sets. 

hit = training_dataset_dates.intersection(test_dataset_dates)

miss = training_dataset_dates.difference(test_dataset_dates)

extra = test_dataset_dates.difference(training_dataset_dates)

#convert each of the sets of results to a list and write it to a .txt file. 

hitlist = list(hit)
misslist = list(miss)
extralist = list(extra)

hits = len(hitlist)
misses = len(misslist)
extras = len(extralist) 

hits_per_year = hits/ (years[1] - years[0] + 1)
misses_per_year = misses / (years[1] - years[0] + 1)
extras_per_year = extras / (years[1] - years[0] + 1)
    
print('Hits: ', hits)
print('Hits per year: ', hits_per_year)
print('Misses: ', misses)
print('Misses per year: ', misses_per_year)
print('Extras: ', extras)
print('Extras per year: ', extras_per_year)

#==============================================================================

# Create an array of the dates and rain (from Mike Pook's dataset) of the 
# cut-off lows that were missed
lm = len(misslist)
missarray = np.zeros((lm, 4), float)

looprange = np.arange(0,lm)

for r in looprange:
    
    date = misslist[r]    
    y = date[0]
    m = date[1]
    d = date[2]
    
    im = (training_dataset[:,0] == y) & (training_dataset[:,1] == m) & (training_dataset[:,2] == d)
    rain = training_dataset[im,3]
    
    missarray[r,0] = y
    missarray[r,1] = m
    missarray[r,2] = d
    missarray[r,3] = rain

# Create an array of the dates and rain of (from Mike Pook's dataset) the 
# cut-off lows that were hit    
lh = len(hitlist)
hitarray = np.zeros((lh, 4), float)

looprange = np.arange(0,lh)

for r in looprange:
    
    date = hitlist[r]    
    y = date[0]
    m = date[1]
    d = date[2]
    
    ih = (training_dataset[:,0] == y) & (training_dataset[:,1] == m) & (training_dataset[:,2] == d)
    rain = training_dataset[ih,3]
    
    hitarray[r,0] = y
    hitarray[r,1] = m
    hitarray[r,2] = d
    hitarray[r,3] = rain

# Create an array of the dates of the extra cut-off lows that were found 
le = len(extralist)
extraarray = np.zeros((le, 3), float)

looprange = np.arange(0,le)

for r in looprange:
    
    date = extralist[r]    
    y = date[0]
    m = date[1]
    d = date[2]
    
    extraarray[r,0] = y
    extraarray[r,1] = m
    extraarray[r,2] = d

#==============================================================================
    
# Save the three result arrays as .txt files
if save_results == 1:

    output_filename = save_path + 'r13_matches.txt'
    np.savetxt(output_filename, hitarray, delimiter=',') 
    
    output_filename = save_path + 'r13_only.txt'
    np.savetxt(output_filename, missarray, delimiter=',') 
    
    output_filename = save_path + 'mu_only.txt'
    np.savetxt(output_filename, extraarray, delimiter=',') 

[1979.    4.    4.]
[1979.    4.    4.]
Hits:  1201
Hits per year:  38.74193548387097
Misses:  241
Misses per year:  7.774193548387097
Extras:  692
Extras per year:  22.322580645161292


In [2]:
print('Hits: ', len(hitlist))
print('Misses: ', len(misslist))
print('Extras: ', len(extralist))
print(len(hitlist) / (len(hitlist) + len(misslist) + len(extralist)))

Hits:  1201
Misses:  241
Extras:  692
0.5627928772258669


In [3]:
print('CSI:')
print(hits / (hits + misses + extras))

CSI:
0.5627928772258669


In [4]:
print('R13 percent:')
print(len(hitlist) / (len(hitlist) + len(misslist)))

P06 percent:
0.8328710124826629


In [None]:
print('MU percent:')
print(len(hitlist) / (len(hitlist) + len(extralist)))