Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add distillation of diag file statistics to ocnanalvrfy task #491

Merged
merged 13 commits into from
May 25, 2023
Merged
3 changes: 3 additions & 0 deletions scripts/exgdas_global_marine_analysis_vrfy.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import cartopy.crs as ccrs
import gen_eva_obs_yaml
import marine_eva_post
import diag_statistics
import subprocess
from datetime import datetime, timedelta

Expand Down Expand Up @@ -324,3 +325,5 @@ def plot_zonal_slice(config):
infile = os.path.join('evayamls', file)
print('running eva on', infile)
subprocess.run(['eva', infile], check=True)

diag_statistics.get_diag_stats()
87 changes: 87 additions & 0 deletions ush/diag_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#!/usr/bin/env python3
# diag_statistics.py
# in ocnanalvrfy task, untar and unzip ocean-relevant diag files from
# atmospheric analysis, calculate means, and write them as ascii to arcdir
import os
import numpy as np
import pandas as pd
from netCDF4 import Dataset
import tarfile
import gzip
import shutil

variables = ['sst', 't']

data = os.getenv('DATA')
pdy = os.getenv('PDY')
cyc = os.getenv('cyc')
comout = os.getenv('COM_ATMOS_ANALYSIS')
arcdir = os.getenv('ARCDIR')


def get_diag_stats():

tarfilename = 'gdas.t' + cyc + 'z.cnvstat'

os.chdir(data)

for var in variables:

diagfilename = 'diag_conv_' + var + '_ges.' + pdy + cyc + '.nc4'
zipfilename = diagfilename + '.gz'
outfilename = 'cnvstat.' + var + '.gdas.' + pdy + cyc
CoryMartin-NOAA marked this conversation as resolved.
Show resolved Hide resolved

with tarfile.open(os.path.join(comout, tarfilename), "r") as tf:
tf.extract(member=zipfilename)
with gzip.open(zipfilename, 'rb') as f_in:
with open(diagfilename, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)

# The following is lifted from PyGSI/src/pyGSI/diags.py

df_dict = {}

# Open netCDF file, store data into dictionary
with Dataset(diagfilename, mode='r') as f:
for var in f.variables:
# Station_ID and Observation_Class variables need
# to be converted from byte string to string
if var in ['Station_ID', 'Observation_Class']:
diagdata = f.variables[var][:]
diagdata = [i.tobytes(fill_value='/////', order='C')
for i in diagdata]
diagdata = np.array(
[''.join(i.decode('UTF-8', 'ignore').split())
for i in diagdata])
df_dict[var] = diagdata

# Grab variables with only 'nobs' dimension
elif len(f.variables[var].shape) == 1:
df_dict[var] = f.variables[var][:]

# Create pandas dataframe from dict
df = pd.DataFrame(df_dict)

# looking at the used observations, maybe variations on this later
df = df[df.Analysis_Use_Flag == 1.0]

# this is a crude filter to obtain surface t observations, hopefully
# catching at least most of the airborn observations and passing the
# sea observations (with much else)
# TODO: This needs to be refined, possibly using obtype
df = df[df.Station_Elevation <= 10.0]

meaned_cols = ['Obs_Minus_Forecast_unadjusted',
'Obs_Minus_Forecast_adjusted']

df[meaned_cols] = np.abs(df[meaned_cols])

means = df.groupby(['Observation_Type'])[meaned_cols].mean()
mean_all = df[meaned_cols].mean()
mean_all.name = 'All'
means = pd.concat([means, mean_all.to_frame().T])
means.index.name = 'obtype'

outfilenamepath = os.path.join(arcdir, outfilename)
print('writing ', outfilenamepath)
means.to_csv(outfilenamepath)
Loading