In [1]:
import os

import matplotlib.pyplot as plt
import netCDF4 as nc
import numpy as np
import seaborn as sns
import xarray as xr

In [2]:
filedir = '/glade/work/abaker/jian/puma-cam6/'
pydir = '/glade/u/home/abaker/repos/PyCECT/'

In [3]:
save_filename = filedir + 'savefile_3runs.nc'
ds = xr.open_dataset(save_filename)
sum_filename = filedir + 'uf.ens.f19_f19_mg17.F2000dev.pumas.summary_c090823.nc'
sum_ds = xr.open_dataset(sum_filename)

In [9]:
# from savefile
test_size = ds.dims['test_size']
ens_size = ds.dims['ens_size']
nvars = ds.dims['nvars']

# get var names
vars = ds['vars'].values

# get test scores and means
t_scores = ds['scores'].values
t_std_gm = ds['std_gm'].values
t_gm = ds['gm'].values

# get ens pc scores distribution (nvars = npcs)
# these are mean zero, so array just contains sigma)
ens_score_dist = ds['ens_sigma_scores'].values
# this is the  standardized global means (nvars by ens_size) => so dist mean = 0 for each var
ens_std_gm = ds['ens_std_gm'].values

In [10]:
# from sumfile
ens_gm = sum_ds['global_mean'].values
P = sum_ds['loadings_gm'].values

In [11]:
str_vars = []
for i in range(vars.size):
    str_vars.append(vars[i].decode('UTF-8'))

In [12]:
name = 'dst_a2'
var_id = str_vars.index(name)
var_id

98

In [16]:
print('Ensemble standardized global mean distribution')
print('99.5 percentile = ', np.percentile(ens_std_gm[var_id, :], 99.5))
print('mean = ', ens_std_gm[var_id, :].mean())
print('min = ', ens_std_gm[var_id, :].min())
print('max = ', ens_std_gm[var_id, :].max())

Ensemble standardized global mean distribution
99.5 percentile =  1.994886847102695
mean =  -4.056691490664629e-14
min =  -4.507574812044302
max =  2.4474573240987207


In [17]:
print('Test standardized gm')
t_std_gm[var_id, :]

Test standardized gm


array([  1.1576871 , -25.00288693, -25.07065588])

In [18]:
print('Ensemble ORIG global mean distribution')
print('mean = ', ens_gm[var_id, :].mean())
print('min = ', ens_gm[var_id, :].min())
print('max = ', ens_gm[var_id, :].max())

Ensemble ORIG global mean distribution
mean =  6.178443962526598e-15
min =  6.11234271347235e-15
max =  6.214334656053772e-15


In [19]:
print('Test ORIG gm')
t_gm[var_id, :]

Test ORIG gm


array([6.19542084e-15, 5.81178958e-15, 5.81079578e-15])

In [98]:
# remember 0-index here compared to the pyCECT output
# PC 5: failed 2 runs  [2, 3]
# PC 6: failed 3 runs  [1, 2, 3]
# PC 7: failed 2 runs  [2, 3]
# PC 8: failed 2 runs  [2, 3]

pc_id = 4  # same as 5 above

In [99]:
print('ensemble: sigma = ', ens_score_dist[pc_id])
print('ensemble: 2*sigma = ', 2 * ens_score_dist[pc_id])

ensemble: sigma =  2.9578030763011744
ensemble: 2*sigma =  5.915606152602349


In [100]:
t_scores[pc_id, :]

array([-1.07940943, 24.39905882, 27.25332384])

In [101]:
# which variables contribute most to this pc?
# each column of P corresponds to the variable weights of each PC, so PC1 weights are in the first column (remember 0-indexing)
P.shape
wts = abs(P[:, pc_id])
sort_wts = np.argsort(wts)[::-1]

In [102]:
# look at the 20 biggest weights
sort_wts[:20]

array([215, 209, 212,  99, 240, 213, 238,  98, 210, 207, 102, 111,  97,
       218, 217, 178, 264, 263, 177, 274])

In [103]:
wts[sort_wts][:20]

array([0.26960287, 0.26810269, 0.26627399, 0.23106317, 0.22440001,
       0.20459294, 0.20453547, 0.19505496, 0.19475654, 0.18880373,
       0.17643708, 0.13342117, 0.12903841, 0.11046072, 0.10163009,
       0.09753223, 0.09752476, 0.09752093, 0.09750298, 0.0891191 ])

In [104]:
# which vars are these?
for i in sort_wts[:20]:
    print(str_vars[i], ": ", wts[i])

dst_a3_SRF :  0.2696028695522857
dst_a1_SRF :  0.2681026914133036
dst_a2_SRF :  0.2662739908701279
dst_a3 :  0.23106317443207447
num_a3_SRF :  0.22440001202523988
dst_a3DDF :  0.2045929363567556
num_a3DDF :  0.2045354707161037
dst_a2 :  0.19505495851615695
dst_a2DDF :  0.194756539971718
dst_a1DDF :  0.18880372643904972
dst_c3 :  0.17643707717616158
num_a3 :  0.13342117237349407
dst_a1 :  0.1290384126146087
dst_c3SFWET :  0.11046071705935837
dst_c2SFWET :  0.10163008633074497
SO2_CMXF :  0.09753223394296479
so4_a2_CMXF :  0.09752476462307504
so4_a2_CLXF :  0.09752093359392637
SO2_CLXF :  0.09750297514458574
soa_a1_SRF :  0.08911909539703942
