From 1553a647d7d4eb23c05969da18ef5817e7e1e31e Mon Sep 17 00:00:00 2001 From: AndrewEichmann-NOAA Date: Thu, 18 May 2023 17:06:21 +0000 Subject: [PATCH 1/9] addition of diag statistics to marine vrfy --- scripts/exgdas_global_marine_analysis_vrfy.py | 7 +- ush/diag_statistics.py | 84 +++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 ush/diag_statistics.py diff --git a/scripts/exgdas_global_marine_analysis_vrfy.py b/scripts/exgdas_global_marine_analysis_vrfy.py index f26cdd631..9f6fdcf04 100755 --- a/scripts/exgdas_global_marine_analysis_vrfy.py +++ b/scripts/exgdas_global_marine_analysis_vrfy.py @@ -25,6 +25,7 @@ import cartopy.crs as ccrs import gen_eva_obs_yaml import marine_eva_post +import diag_statistics import subprocess from datetime import datetime, timedelta @@ -121,8 +122,8 @@ def plot_zonal_slice(config): comout = os.getenv('COM_OCEAN_ANALYSIS') -com_ice_history = os.getenv('COM_ICE_HISTORY_PREV') -com_ocean_history = os.getenv('COM_OCEAN_HISTORY_PREV') +com_ice_history = os.getenv('COM_ICE_HISTORY') +com_ocean_history = os.getenv('COM_OCEAN_HISTORY') data = os.getenv('DATA') pdy = os.getenv('PDY') cyc = os.getenv('cyc') @@ -324,3 +325,5 @@ def plot_zonal_slice(config): infile = os.path.join('evayamls', file) print('running eva on', infile) subprocess.run(['eva', infile], check=True) + +diag_statistics.get_diag_stats() diff --git a/ush/diag_statistics.py b/ush/diag_statistics.py new file mode 100644 index 000000000..f078fca5f --- /dev/null +++ b/ush/diag_statistics.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +import os +import numpy as np +import pandas as pd +from netCDF4 import Dataset + +data = os.getenv('DATA') +pdy = os.getenv('PDY') +cyc = os.getenv('cyc') +comout=os.getenv('COM_ATMOS_ANALYSIS_TMPL') +arcdir = os.getenv('arcdir') + +#pdy = '20210715' +#cyc = '00' +#comout='/Users/mossland/Desktop/work/WCDA/no-ocn-da/gdas.20210715/00/atmos' +#data='/Users/mossland/Desktop/work/WCDA/no-ocn-da/rundir' +#arcdir='/Users/mossland/Desktop/work/WCDA/no-ocn-da/arcdir' + +def get_diag_stats(): + + variables = ['sst','t'] + + tarfilename='gdas.t' + cyc + 'z.cnvstat' + tarfile=os.path.join(comout, tarfilename) + + os.chdir(data) + + for var in variables: + + diagfilename= 'diag_conv_' + var + '_ges.' + pdy + cyc + '.nc4' + diagfile=os.path.join(data, diagfilename) + zipfilename = diagfilename + '.gz' + zipfile=os.path.join(data, zipfilename) + outfile='cnvstat.' + var +'.gdas.' + pdy + cyc + + command = 'tar -xvf ' + tarfile + ' ' + zipfilename + print(command) + os.system(command) + command = 'gunzip ' + os.path.join(data, zipfile) + print(command) + os.system(command) + + # The following is lifted from PyGSI/src/pyGSI/diags.py + + df_dict = {} + + # Open netCDF file, store data into dictionary + with Dataset(diagfile, mode='r') as f: + for var in f.variables: + # Station_ID and Observation_Class variables need + # to be converted from byte string to string + if var in ['Station_ID', 'Observation_Class']: + diagdata = f.variables[var][:] + diagdata = [i.tobytes(fill_value='/////', order='C') + for i in diagdata] + diagdata = np.array( + [''.join(i.decode('UTF-8', 'ignore').split()) + for i in diagdata]) + df_dict[var] = diagdata + + # Grab variables with only 'nobs' dimension + elif len(f.variables[var].shape) == 1: + df_dict[var] = f.variables[var][:] + + # Create pandas dataframe from dict + df = pd.DataFrame(df_dict) + + # looking at the used observations, maybe variations on this later + df = df[df.Analysis_Use_Flag == 1.0] + + meaned_cols = ['Obs_Minus_Forecast_unadjusted', + 'Obs_Minus_Forecast_adjusted'] + + df[meaned_cols] = np.abs(df[meaned_cols]) + + means = df.groupby(['Observation_Type'])[meaned_cols].mean() + mean_all = df[meaned_cols].mean() + mean_all.name='All' + means = pd.concat([means, mean_all.to_frame().T]) + means.index.name='obtype' + + print('writing ', outfile) + means.to_csv(os.path.join(arcdir, outfile)) + From c8c7deeffcb8f74d91c24df85e6de0cb46021e4c Mon Sep 17 00:00:00 2001 From: AndrewEichmann-NOAA Date: Tue, 23 May 2023 18:40:02 +0000 Subject: [PATCH 2/9] return fetched COM directories to *_PREV --- scripts/exgdas_global_marine_analysis_vrfy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/exgdas_global_marine_analysis_vrfy.py b/scripts/exgdas_global_marine_analysis_vrfy.py index 9f6fdcf04..33699f1d3 100755 --- a/scripts/exgdas_global_marine_analysis_vrfy.py +++ b/scripts/exgdas_global_marine_analysis_vrfy.py @@ -122,8 +122,8 @@ def plot_zonal_slice(config): comout = os.getenv('COM_OCEAN_ANALYSIS') -com_ice_history = os.getenv('COM_ICE_HISTORY') -com_ocean_history = os.getenv('COM_OCEAN_HISTORY') +com_ice_history = os.getenv('COM_ICE_HISTORY_PREV') +com_ocean_history = os.getenv('COM_OCEAN_HISTORY_PREV') data = os.getenv('DATA') pdy = os.getenv('PDY') cyc = os.getenv('cyc') From 29e10509a94f159be074cc643b3a39ece907bb72 Mon Sep 17 00:00:00 2001 From: AndrewEichmann-NOAA Date: Tue, 23 May 2023 19:34:05 +0000 Subject: [PATCH 3/9] attempt to use subprocess --- ush/diag_statistics.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/ush/diag_statistics.py b/ush/diag_statistics.py index f078fca5f..a0c357909 100644 --- a/ush/diag_statistics.py +++ b/ush/diag_statistics.py @@ -7,14 +7,8 @@ data = os.getenv('DATA') pdy = os.getenv('PDY') cyc = os.getenv('cyc') -comout=os.getenv('COM_ATMOS_ANALYSIS_TMPL') -arcdir = os.getenv('arcdir') - -#pdy = '20210715' -#cyc = '00' -#comout='/Users/mossland/Desktop/work/WCDA/no-ocn-da/gdas.20210715/00/atmos' -#data='/Users/mossland/Desktop/work/WCDA/no-ocn-da/rundir' -#arcdir='/Users/mossland/Desktop/work/WCDA/no-ocn-da/arcdir' +comout=os.getenv('COM_ATMOS_ANALYSIS') +arcdir = os.getenv('ARCDIR') def get_diag_stats(): @@ -35,11 +29,13 @@ def get_diag_stats(): command = 'tar -xvf ' + tarfile + ' ' + zipfilename print(command) - os.system(command) + #os.system(command) + subprocess.run(['tar', '-xfv', tarfile, zipfilename], check=True) command = 'gunzip ' + os.path.join(data, zipfile) print(command) - os.system(command) - + #os.system(command) + subprocess.run(['gunzip', os.path.join(data, zipfile)], check=True) + # The following is lifted from PyGSI/src/pyGSI/diags.py df_dict = {} @@ -67,6 +63,12 @@ def get_diag_stats(): # looking at the used observations, maybe variations on this later df = df[df.Analysis_Use_Flag == 1.0] + + # this is a crude filter to obtain surface t observations, hopefully + # catching at least most of the airborn observations and passing the + # sea observations + # TODO: This needs to be refined, possibly using obtype + df = df[df.Station_Elevation <= 10.0] meaned_cols = ['Obs_Minus_Forecast_unadjusted', 'Obs_Minus_Forecast_adjusted'] @@ -81,4 +83,3 @@ def get_diag_stats(): print('writing ', outfile) means.to_csv(os.path.join(arcdir, outfile)) - From 1eaf956080924a179f1b5a9e9f10bbd161c18300 Mon Sep 17 00:00:00 2001 From: AndrewEichmann-NOAA Date: Wed, 24 May 2023 13:31:38 +0000 Subject: [PATCH 4/9] going back to os.system --- ush/diag_statistics.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ush/diag_statistics.py b/ush/diag_statistics.py index a0c357909..8a34610a7 100644 --- a/ush/diag_statistics.py +++ b/ush/diag_statistics.py @@ -27,14 +27,14 @@ def get_diag_stats(): zipfile=os.path.join(data, zipfilename) outfile='cnvstat.' + var +'.gdas.' + pdy + cyc + #TODO: these probably should be in the jjob. Will try to get them there + # once this is up and running command = 'tar -xvf ' + tarfile + ' ' + zipfilename - print(command) - #os.system(command) - subprocess.run(['tar', '-xfv', tarfile, zipfilename], check=True) + print('running', command) + os.system(command) command = 'gunzip ' + os.path.join(data, zipfile) - print(command) - #os.system(command) - subprocess.run(['gunzip', os.path.join(data, zipfile)], check=True) + print('running', command) + os.system(command) # The following is lifted from PyGSI/src/pyGSI/diags.py @@ -66,7 +66,7 @@ def get_diag_stats(): # this is a crude filter to obtain surface t observations, hopefully # catching at least most of the airborn observations and passing the - # sea observations + # sea observations (with much else) # TODO: This needs to be refined, possibly using obtype df = df[df.Station_Elevation <= 10.0] From c977e627eefcfc6c13ea487f6a9c83e1bbb9026b Mon Sep 17 00:00:00 2001 From: AndrewEichmann-NOAA Date: Wed, 24 May 2023 13:41:18 +0000 Subject: [PATCH 5/9] run through autopep8 --- ush/diag_statistics.py | 56 ++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/ush/diag_statistics.py b/ush/diag_statistics.py index 8a34610a7..a530ecbe7 100644 --- a/ush/diag_statistics.py +++ b/ush/diag_statistics.py @@ -7,27 +7,28 @@ data = os.getenv('DATA') pdy = os.getenv('PDY') cyc = os.getenv('cyc') -comout=os.getenv('COM_ATMOS_ANALYSIS') +comout = os.getenv('COM_ATMOS_ANALYSIS') arcdir = os.getenv('ARCDIR') + def get_diag_stats(): - variables = ['sst','t'] - - tarfilename='gdas.t' + cyc + 'z.cnvstat' - tarfile=os.path.join(comout, tarfilename) - + variables = ['sst', 't'] + + tarfilename = 'gdas.t' + cyc + 'z.cnvstat' + tarfile = os.path.join(comout, tarfilename) + os.chdir(data) - + for var in variables: - - diagfilename= 'diag_conv_' + var + '_ges.' + pdy + cyc + '.nc4' - diagfile=os.path.join(data, diagfilename) + + diagfilename = 'diag_conv_' + var + '_ges.' + pdy + cyc + '.nc4' + diagfile = os.path.join(data, diagfilename) zipfilename = diagfilename + '.gz' - zipfile=os.path.join(data, zipfilename) - outfile='cnvstat.' + var +'.gdas.' + pdy + cyc - - #TODO: these probably should be in the jjob. Will try to get them there + zipfile = os.path.join(data, zipfilename) + outfile = 'cnvstat.' + var + '.gdas.' + pdy + cyc + + # TODO: these probably should be in the jjob. Will try to get them there # once this is up and running command = 'tar -xvf ' + tarfile + ' ' + zipfilename print('running', command) @@ -37,9 +38,9 @@ def get_diag_stats(): os.system(command) # The following is lifted from PyGSI/src/pyGSI/diags.py - + df_dict = {} - + # Open netCDF file, store data into dictionary with Dataset(diagfile, mode='r') as f: for var in f.variables: @@ -48,19 +49,19 @@ def get_diag_stats(): if var in ['Station_ID', 'Observation_Class']: diagdata = f.variables[var][:] diagdata = [i.tobytes(fill_value='/////', order='C') - for i in diagdata] + for i in diagdata] diagdata = np.array( [''.join(i.decode('UTF-8', 'ignore').split()) for i in diagdata]) df_dict[var] = diagdata - + # Grab variables with only 'nobs' dimension elif len(f.variables[var].shape) == 1: df_dict[var] = f.variables[var][:] - + # Create pandas dataframe from dict df = pd.DataFrame(df_dict) - + # looking at the used observations, maybe variations on this later df = df[df.Analysis_Use_Flag == 1.0] @@ -68,18 +69,19 @@ def get_diag_stats(): # catching at least most of the airborn observations and passing the # sea observations (with much else) # TODO: This needs to be refined, possibly using obtype - df = df[df.Station_Elevation <= 10.0] - + df = df[df.Station_Elevation <= 10.0] + meaned_cols = ['Obs_Minus_Forecast_unadjusted', 'Obs_Minus_Forecast_adjusted'] - + df[meaned_cols] = np.abs(df[meaned_cols]) - + means = df.groupby(['Observation_Type'])[meaned_cols].mean() mean_all = df[meaned_cols].mean() - mean_all.name='All' + mean_all.name = 'All' means = pd.concat([means, mean_all.to_frame().T]) - means.index.name='obtype' - + means.index.name = 'obtype' + print('writing ', outfile) means.to_csv(os.path.join(arcdir, outfile)) + From 2034e20ed1d3014533548fbb8acbf8d2668e9d73 Mon Sep 17 00:00:00 2001 From: AndrewEichmann-NOAA Date: Wed, 24 May 2023 13:45:08 +0000 Subject: [PATCH 6/9] more coding norms --- ush/diag_statistics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ush/diag_statistics.py b/ush/diag_statistics.py index a530ecbe7..731569a2c 100644 --- a/ush/diag_statistics.py +++ b/ush/diag_statistics.py @@ -84,4 +84,3 @@ def get_diag_stats(): print('writing ', outfile) means.to_csv(os.path.join(arcdir, outfile)) - From 17bc3c99a5db40e1e55d3d03d434f6db3db026b0 Mon Sep 17 00:00:00 2001 From: AndrewEichmann-NOAA Date: Thu, 25 May 2023 14:34:40 +0000 Subject: [PATCH 7/9] tweaks and improvements --- ush/diag_statistics.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/ush/diag_statistics.py b/ush/diag_statistics.py index 731569a2c..335d091e1 100644 --- a/ush/diag_statistics.py +++ b/ush/diag_statistics.py @@ -1,8 +1,16 @@ #!/usr/bin/env python3 +# diag_statistics.py +# in ocnanalvrfy task, untar and unzip ocean-relevant diag files from +# atmospheric analysis, calculate means, and write them as ascii to arcdir import os import numpy as np import pandas as pd from netCDF4 import Dataset +import tarfile +import gzip +import shutil + +variables = ['sst', 't'] data = os.getenv('DATA') pdy = os.getenv('PDY') @@ -10,39 +18,30 @@ comout = os.getenv('COM_ATMOS_ANALYSIS') arcdir = os.getenv('ARCDIR') - def get_diag_stats(): - variables = ['sst', 't'] - tarfilename = 'gdas.t' + cyc + 'z.cnvstat' - tarfile = os.path.join(comout, tarfilename) os.chdir(data) for var in variables: diagfilename = 'diag_conv_' + var + '_ges.' + pdy + cyc + '.nc4' - diagfile = os.path.join(data, diagfilename) zipfilename = diagfilename + '.gz' - zipfile = os.path.join(data, zipfilename) - outfile = 'cnvstat.' + var + '.gdas.' + pdy + cyc - - # TODO: these probably should be in the jjob. Will try to get them there - # once this is up and running - command = 'tar -xvf ' + tarfile + ' ' + zipfilename - print('running', command) - os.system(command) - command = 'gunzip ' + os.path.join(data, zipfile) - print('running', command) - os.system(command) + outfilename = 'cnvstat.' + var + '.gdas.' + pdy + cyc + + with tarfile.open(os.path.join(comout, tarfilename), "r") as tf: + tf.extract(member=zipfilename) + with gzip.open(zipfilename, 'rb') as f_in: + with open(diagfilename, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) # The following is lifted from PyGSI/src/pyGSI/diags.py df_dict = {} # Open netCDF file, store data into dictionary - with Dataset(diagfile, mode='r') as f: + with Dataset(diagfilename, mode='r') as f: for var in f.variables: # Station_ID and Observation_Class variables need # to be converted from byte string to string @@ -82,5 +81,6 @@ def get_diag_stats(): means = pd.concat([means, mean_all.to_frame().T]) means.index.name = 'obtype' - print('writing ', outfile) - means.to_csv(os.path.join(arcdir, outfile)) + outfilenamepath = os.path.join(arcdir, outfilename) + print('writing ', outfilenamepath) + means.to_csv(outfilenamepath) From c795fb46f43f96546f8afa22cbc5faef938b7b10 Mon Sep 17 00:00:00 2001 From: AndrewEichmann-NOAA Date: Thu, 25 May 2023 14:42:16 +0000 Subject: [PATCH 8/9] pep 8 correction --- ush/diag_statistics.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ush/diag_statistics.py b/ush/diag_statistics.py index 335d091e1..7e501fe96 100644 --- a/ush/diag_statistics.py +++ b/ush/diag_statistics.py @@ -18,6 +18,7 @@ comout = os.getenv('COM_ATMOS_ANALYSIS') arcdir = os.getenv('ARCDIR') + def get_diag_stats(): tarfilename = 'gdas.t' + cyc + 'z.cnvstat' From e79111acb2b56b81ea063b266a56b2a88e350393 Mon Sep 17 00:00:00 2001 From: AndrewEichmann-NOAA Date: Thu, 25 May 2023 17:39:17 +0000 Subject: [PATCH 9/9] added csv suffix to csv file --- ush/diag_statistics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/diag_statistics.py b/ush/diag_statistics.py index 7e501fe96..8068a8c81 100644 --- a/ush/diag_statistics.py +++ b/ush/diag_statistics.py @@ -29,7 +29,7 @@ def get_diag_stats(): diagfilename = 'diag_conv_' + var + '_ges.' + pdy + cyc + '.nc4' zipfilename = diagfilename + '.gz' - outfilename = 'cnvstat.' + var + '.gdas.' + pdy + cyc + outfilename = 'cnvstat.' + var + '.gdas.' + pdy + cyc + '.csv' with tarfile.open(os.path.join(comout, tarfilename), "r") as tf: tf.extract(member=zipfilename)