Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Save basic stats in csv at each cycle #1040

Merged
merged 7 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions parm/soca/obs/obs_stats.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
time window:
begin: '1900-01-01T00:00:00Z'
end: '2035-01-01T00:00:00Z'
bound to include: begin

obs spaces:
{{ obs_spaces }}
77 changes: 74 additions & 3 deletions scripts/exgdas_global_marine_analysis_post.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@
import glob
import shutil
from datetime import datetime, timedelta
from wxflow import FileHandler, Logger
from wxflow import AttrDict, FileHandler, Logger, parse_j2yaml
from multiprocessing import Process
import subprocess
import netCDF4
import re

logger = Logger()

Expand All @@ -36,8 +40,6 @@ def list_all_files(dir_in, dir_out, wc='*', fh_list=[]):
return fh_list


logger.info(f"---------------- Copy from RUNDIR to COMOUT")

com_ocean_analysis = os.getenv('COM_OCEAN_ANALYSIS')
com_ice_restart = os.getenv('COM_ICE_RESTART')
anl_dir = os.getenv('DATA')
Expand All @@ -52,6 +54,8 @@ def list_all_files(dir_in, dir_out, wc='*', fh_list=[]):
bdate = datetime.strftime(bdatedt, '%Y-%m-%dT%H:00:00Z')
mdate = datetime.strftime(datetime.strptime(cdate, '%Y%m%d%H'), '%Y-%m-%dT%H:00:00Z')

logger.info(f"---------------- Copy from RUNDIR to COMOUT")

post_file_list = []

# Make a copy the IAU increment
Expand Down Expand Up @@ -106,3 +110,70 @@ def list_all_files(dir_in, dir_out, wc='*', fh_list=[]):
os.path.join(com_ocean_analysis, 'yaml'), wc='*.yaml', fh_list=fh_list)

FileHandler({'copy': fh_list}).sync()

# obs space statistics
logger.info(f"---------------- Compute basic stats")
diags_list = glob.glob(os.path.join(os.path.join(com_ocean_analysis, 'diags', '*.nc4')))
obsstats_j2yaml = str(os.path.join(os.getenv('HOMEgfs'), 'sorc', 'gdas.cd',
'parm', 'soca', 'obs', 'obs_stats.yaml.j2'))


# function to create a minimalist ioda obs sapce
def create_obs_space(data):
os_dict = {"obs space": {
"name": data["obs_space"],
"obsdatain": {
"engine": {"type": "H5File", "obsfile": data["obsfile"]}
},
"simulated variables": [data["variable"]]
},
"variable": data["variable"],
"experiment identifier": data["pslot"],
"csv output": data["csv_output"]
}
return os_dict


# attempt to extract the experiment id from the path
pslot = os.path.normpath(com_ocean_analysis).split(os.sep)[-5]

# iterate through the obs spaces and generate the yaml for gdassoca_obsstats.x
obs_spaces = []
for obsfile in diags_list:

# define an obs space name
obs_space = re.sub(r'\.\d{10}\.nc4$', '', os.path.basename(obsfile))

# get the variable name, assume 1 variable per file
nc = netCDF4.Dataset(obsfile, 'r')
variable = next(iter(nc.groups["ObsValue"].variables))
nc.close()

# filling values for the templated yaml
data = {'obs_space': os.path.basename(obsfile),
'obsfile': obsfile,
'pslot': pslot,
'variable': variable,
'csv_output': os.path.join(com_ocean_analysis,
f"{RUN}.t{cyc}z.ocn.{obs_space}.stats.csv")}
obs_spaces.append(create_obs_space(data))

# create the yaml
data = {'obs_spaces': obs_spaces}
conf = parse_j2yaml(path=obsstats_j2yaml, data=data)
stats_yaml = 'diag_stats.yaml'
conf.save(stats_yaml)

# run the application
# TODO(GorA): this should be setup properly in the g-w once gdassoca_obsstats is in develop
gdassoca_obsstats_exec = os.path.join(os.getenv('HOMEgfs'),
'sorc', 'gdas.cd', 'build', 'bin', 'gdassoca_obsstats.x')
command = f"{os.getenv('launcher')} {gdassoca_obsstats_exec} {stats_yaml}"
logger.info(f"{command}")
result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# issue a warning if the process has failed
if result.returncode != 0:
logger.warning(f"{command} has failed")
if result.stderr:
print("STDERR:", result.stderr.decode())
97 changes: 97 additions & 0 deletions ush/soca/gdassoca_obsstats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/env python3


# creates figures of timeseries from the csv outputs computed by gdassoca_obsstats.x
import argparse
from itertools import product
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates


class ObsStats:
def __init__(self):
self.data = pd.DataFrame()

def read_csv(self, filepaths):
# Iterate through the list of file paths and append their data
for filepath in filepaths:
new_data = pd.read_csv(filepath)
# Convert date to datetime for easier plotting
new_data['date'] = pd.to_datetime(new_data['date'], format='%Y%m%d%H')
self.data = pd.concat([self.data, new_data], ignore_index=True)

def plot_timeseries(self, ocean, variable):
# Filter data for the given ocean and variable
filtered_data = self.data[(self.data['Ocean'] == ocean) & (self.data['Variable'] == variable)]

if filtered_data.empty:
print("No data available for the given ocean and variable combination.")
return

# Get unique experiments
experiments = filtered_data['Exp'].unique()

# Plot settings
fig, axs = plt.subplots(3, 1, figsize=(10, 15), sharex=True)
fig.suptitle(f'{variable} statistics, {ocean} ocean', fontsize=16, fontweight='bold')

for exp in experiments:
exp_data = filtered_data[filtered_data['Exp'] == exp]

# Plot RMSE
axs[0].plot(exp_data['date'], exp_data['RMSE'], marker='o', linestyle='-', label=exp)
axs[0].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H'))
axs[0].xaxis.set_major_locator(mdates.DayLocator())
axs[0].tick_params(labelbottom=False)
axs[0].set_ylabel('RMSE')
axs[0].legend()
axs[0].grid(True)

# Plot Bias
axs[1].plot(exp_data['date'], exp_data['Bias'], marker='o', linestyle='-', label=exp)
axs[1].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H'))
axs[1].xaxis.set_major_locator(mdates.DayLocator())
axs[1].tick_params(labelbottom=False)
axs[1].set_ylabel('Bias')
axs[1].legend()
axs[1].grid(True)

# Plot Count
axs[2].plot(exp_data['date'], exp_data['Count'], marker='o', linestyle='-', label=exp)
axs[2].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H'))
axs[2].xaxis.set_major_locator(mdates.DayLocator())
axs[2].set_ylabel('Count')
axs[2].legend()
axs[2].grid(True)

# Improve layout and show plot
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.savefig(f'{ocean}_test.png')


if __name__ == "__main__":
epilog = ["Usage examples: ./gdassoca_obsstats.py --exp gdas.*.csv"]
parser = argparse.ArgumentParser(description="Observation space RMSE's and BIAS's",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=os.linesep.join(epilog))
parser.add_argument("--exps", nargs='+', required=True,
help="Path to the experiment's COMROOT")
parser.add_argument("--variable", required=True, help="ombg_qc or ombg_noqc")
parser.add_argument("--figname", required=True, help="The name of the output figure")
args = parser.parse_args()

flist = []
for exp in args.exps:
print(exp)
wc = exp + '/*.*/??/analysis/ocean/*.t??z.stats.csv'
flist.append(glob.glob(wc))

flist = sum(flist, [])
obsStats = ObsStats()
obsStats.read_csv(flist)
for var, ocean in product(['ombg_noqc', 'ombg_qc'],
['Atlantic', 'Pacific', 'Indian', 'Arctic', 'Southern']):
obsStats.plot_timeseries(ocean, var)
6 changes: 6 additions & 0 deletions utils/soca/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,9 @@ ecbuild_add_executable( TARGET gdas_socahybridweights.x
SOURCES gdas_socahybridweights.cc )
target_compile_features( gdas_socahybridweights.x PUBLIC cxx_std_17)
target_link_libraries( gdas_socahybridweights.x PUBLIC NetCDF::NetCDF_CXX oops atlas soca)

# Obs Stats
ecbuild_add_executable( TARGET gdassoca_obsstats.x
SOURCES gdassoca_obsstats.cc )
target_compile_features( gdassoca_obsstats.x PUBLIC cxx_std_17)
target_link_libraries( gdassoca_obsstats.x PUBLIC NetCDF::NetCDF_CXX oops ioda)
10 changes: 10 additions & 0 deletions utils/soca/gdassoca_obsstats.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#include "gdassoca_obsstats.h"
#include "oops/runs/Run.h"

// this application computes a few basic statistics in obs space

int main(int argc, char ** argv) {
oops::Run run(argc, argv);
gdasapp::ObsStats obsStats;
return run.execute(obsStats);
}
Loading
Loading