diff --git a/scripts/exgdas_global_marine_analysis_vrfy.py b/scripts/exgdas_global_marine_analysis_vrfy.py index f26cdd631..33699f1d3 100755 --- a/scripts/exgdas_global_marine_analysis_vrfy.py +++ b/scripts/exgdas_global_marine_analysis_vrfy.py @@ -25,6 +25,7 @@ import cartopy.crs as ccrs import gen_eva_obs_yaml import marine_eva_post +import diag_statistics import subprocess from datetime import datetime, timedelta @@ -324,3 +325,5 @@ def plot_zonal_slice(config): infile = os.path.join('evayamls', file) print('running eva on', infile) subprocess.run(['eva', infile], check=True) + +diag_statistics.get_diag_stats() diff --git a/ush/diag_statistics.py b/ush/diag_statistics.py new file mode 100644 index 000000000..8068a8c81 --- /dev/null +++ b/ush/diag_statistics.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# diag_statistics.py +# in ocnanalvrfy task, untar and unzip ocean-relevant diag files from +# atmospheric analysis, calculate means, and write them as ascii to arcdir +import os +import numpy as np +import pandas as pd +from netCDF4 import Dataset +import tarfile +import gzip +import shutil + +variables = ['sst', 't'] + +data = os.getenv('DATA') +pdy = os.getenv('PDY') +cyc = os.getenv('cyc') +comout = os.getenv('COM_ATMOS_ANALYSIS') +arcdir = os.getenv('ARCDIR') + + +def get_diag_stats(): + + tarfilename = 'gdas.t' + cyc + 'z.cnvstat' + + os.chdir(data) + + for var in variables: + + diagfilename = 'diag_conv_' + var + '_ges.' + pdy + cyc + '.nc4' + zipfilename = diagfilename + '.gz' + outfilename = 'cnvstat.' + var + '.gdas.' + pdy + cyc + '.csv' + + with tarfile.open(os.path.join(comout, tarfilename), "r") as tf: + tf.extract(member=zipfilename) + with gzip.open(zipfilename, 'rb') as f_in: + with open(diagfilename, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + + # The following is lifted from PyGSI/src/pyGSI/diags.py + + df_dict = {} + + # Open netCDF file, store data into dictionary + with Dataset(diagfilename, mode='r') as f: + for var in f.variables: + # Station_ID and Observation_Class variables need + # to be converted from byte string to string + if var in ['Station_ID', 'Observation_Class']: + diagdata = f.variables[var][:] + diagdata = [i.tobytes(fill_value='/////', order='C') + for i in diagdata] + diagdata = np.array( + [''.join(i.decode('UTF-8', 'ignore').split()) + for i in diagdata]) + df_dict[var] = diagdata + + # Grab variables with only 'nobs' dimension + elif len(f.variables[var].shape) == 1: + df_dict[var] = f.variables[var][:] + + # Create pandas dataframe from dict + df = pd.DataFrame(df_dict) + + # looking at the used observations, maybe variations on this later + df = df[df.Analysis_Use_Flag == 1.0] + + # this is a crude filter to obtain surface t observations, hopefully + # catching at least most of the airborn observations and passing the + # sea observations (with much else) + # TODO: This needs to be refined, possibly using obtype + df = df[df.Station_Elevation <= 10.0] + + meaned_cols = ['Obs_Minus_Forecast_unadjusted', + 'Obs_Minus_Forecast_adjusted'] + + df[meaned_cols] = np.abs(df[meaned_cols]) + + means = df.groupby(['Observation_Type'])[meaned_cols].mean() + mean_all = df[meaned_cols].mean() + mean_all.name = 'All' + means = pd.concat([means, mean_all.to_frame().T]) + means.index.name = 'obtype' + + outfilenamepath = os.path.join(arcdir, outfilename) + print('writing ', outfilenamepath) + means.to_csv(outfilenamepath)