-
Notifications
You must be signed in to change notification settings - Fork 92
/
all_stats.py
153 lines (120 loc) · 5.49 KB
/
all_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
from collections import OrderedDict
'''
How to obtain all the statistical metrics within the skill metrics
library.
This is a simple program that provides examples of how to calculate the
various skill metrics used or available in the library. All the
calculated skill metrics are written to an Excel file for easy viewing
and manipulation. The Python code is kept to a minimum.
All functions in the Skill Metrics library are designed to only work
with one-dimensional arrays, e.g. time series of observations at a
selected location. The one-dimensional data are read in as dictionaries
via a pickle file: ref['data'], pred1['data'], pred2['data'], and
pred3['data']. The statistics are displayed to the screen as well as
written to an Excel file named all_stats.xls.
The reference data used in this example are cell concentrations of a
phytoplankton collected from cruise surveys at selected locations and
time. The model predictions are from three different simulations that
have been space-time interpolated to the location and time of the sample
collection. Details on the contents of the dictionary (once loaded) can
be obtained by simply executing the following two statements
>> key_to_value_lengths = {k:len(v) for k, v in ref.items()}
>> print(key_to_value_lengths)
{'units': 6, 'longitude': 57, 'jday': 57, 'date': 57, 'depth': 57,
'station': 57, 'time': 57, 'latitude': 57, 'data': 57}
Author: Peter A. Rochford
Symplectic, LLC
www.thesymplectic.com
Created on Nov 23, 2016
@author: prochford@thesymplectic.com
'''
import numpy as np
import pickle
import skill_metrics as sm
from sys import version_info
def load_obj(name):
# Load object from file in pickle format
if version_info[0] == 2:
suffix = 'pkl'
else:
suffix = 'pkl3'
with open(name + '.' + suffix, 'rb') as f:
return pickle.load(f) # Python2 succeeds
class Container(object):
def __init__(self, pred1, pred2, pred3, ref):
self.pred1 = pred1
self.pred2 = pred2
self.pred3 = pred3
self.ref = ref
if __name__ == '__main__':
# Calculate various skill metrics, writing results to screen
# and Excel file. Use an ordered dictionary so skill metrics are
# saved in the Excel file in the same order as written to screen.
stats = OrderedDict()
# Read data from pickle file
data = load_obj('target_data')
pred = data.pred1['data']
ref = data.ref['data']
# Get bias
stats['bias'] = sm.bias(pred,ref)
print('Bias = ' + str(stats['bias']))
# Get Root-Mean-Square-Deviation (RMSD)
stats['rmsd'] = sm.rmsd(pred,ref)
print('RMSD = ' + str(stats['rmsd']))
# Get Centered Root-Mean-Square-Deviation (CRMSD)
stats['crmsd'] = sm.centered_rms_dev(pred,ref)
print('CRMSD = ' + str(stats['crmsd']))
# Get Standard Deviation (SDEV)
stats['sdev'] = np.std(pred)
print('SDEV = ' + str(stats['sdev']))
# Get correlation coefficient (r)
ccoef = np.corrcoef(pred,ref)
stats['ccoef'] = ccoef[0,1]
print('r = ' + str(stats['ccoef']))
# Get Non-Dimensional Skill Score (SS)
stats['ss'] = sm.skill_score_murphy(pred,ref)
print('SS (Murphy Skill Score) = ' + str(stats['ss']))
# Get Brier Score (BS)
forecast = np.array([0.7, 0.9, 0.8, 0.4, 0.2, 0, 0, 0, 0, 0.1])
reference = np.array([0.9, 0.7, 0.6, 0.4, 0.2, 0, 0, 0, 0, 0.1])
observed = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 1])
stats['bs'] = sm.brier_score(forecast,observed)
print('SS (Brier Score) = ' + str(stats['bs']))
# Get Brier Skill Score (BSS)
stats['bss'] = sm.skill_score_brier(forecast,reference,observed)
print('BSS (Brier Skill Score) = ' + str(stats['bss']))
# Get Kling-Gupta efficiency 2009 (KGE09)
stats['kge09'] = sm.kling_gupta_eff09(pred,ref)
print('KGE09 (Kling-Gupta efficiency 2009) = ' + str(stats['kge09']))
# Get Kling-Gupta efficiency 2012 (KGE12)
stats['kge12'] = sm.kling_gupta_eff12(pred,ref)
print('KGE09 (Kling-Gupta efficiency 2012) = ' + str(stats['kge12']))
# Get Nash-Sutcliffe efficiency (NSE)
stats['nse'] = sm.nash_sutcliffe_eff(pred,ref)
print('NSE (Nash-Sutcliffe efficiency) = ' + str(stats['nse']))
# Write statistics to Excel file.
filename = 'all_stats.xlsx'
sm.write_stats(filename,stats,overwrite=True)
# Calculate statistics for target diagram
target_stats1 = sm.target_statistics(data.pred1,data.ref,'data')
# Write statistics to Excel file for a single data set
filename = 'target_stats.xlsx'
sm.write_target_stats(filename,target_stats1,overwrite = 'on')
# Calculate statistics for Taylor diagram
# The first array element corresponds to the reference series
# for the while the second is that for the predicted series.
taylor_stats1 = sm.taylor_statistics(data.pred1,data.ref,'data')
taylor_stats2 = sm.taylor_statistics(data.pred2,data.ref,'data')
taylor_stats3 = sm.taylor_statistics(data.pred3,data.ref,'data')
# Write statistics to Excel file
filename = 'taylor_stats.xlsx'
data = [taylor_stats1, taylor_stats2, taylor_stats3]
title = ['Expt. 1', 'Expt. 2', 'Expt. 3']
label = ['Observed', 'M1', 'M2', 'M3']
sm.write_taylor_stats(filename,data,title = title, label = label,
overwrite = True)
# Check statistics for Taylor diagram
diff = sm.check_taylor_stats(taylor_stats1['sdev'],
taylor_stats1['crmsd'],
taylor_stats1['ccoef'])
print('Difference in Taylor statistics = ' + str(diff))