From 005adcdd450542b73c5e34fd4296e0d57e420081 Mon Sep 17 00:00:00 2001 From: DavidNew-NOAA <134300700+DavidNew-NOAA@users.noreply.github.com> Date: Thu, 11 Apr 2024 09:26:44 -0400 Subject: [PATCH 1/8] Add ability for JEDI-to-FV3 increment converter to process ensembles (#1022) Major changes to JEDI-to-FV3 increment converter: 1. Allow processing of ensembles using the same YAML template format that the FV3-JEDI LGETKF app uses. Alternatively, a list of background/increments can be specified. 2. Fix incorrect calculation of FV3 increment, for variables other than delp and hydrostatic_delz, related to two issues: a. When adding the JEDI increment to the background to get the analysis, some mixing-ratio-type variables become negative and are set to zero by FV3-JEDI. However, when we then subtract the analysis and background after the variable change to get delp and hydrostatic_delz, we don't get the same increment that we started with for those mixing-ratio fields. b. For ensemble DA, the deterministic background and JEDI increment are not the same resolution, so the background and analysis discussed above must be interpolated to half resolution before being subtracted but after the JEDI increment is interpolated to full resolution and added to the background. Because of these two interpolations, similar to above, we don't get the same increment as we started with. I now use Atlas to manually set all variables other than delp and hydrostatic_delz back to their original values, which solved both issues. Additionally, I make the following other changes: 1. Rename fv3jedi_fv3inc.yaml to fv3jedi_fv3inc_variational.yaml and rewrite according to the new YAML format. 4. Rewrite the gdasapp_test_fv3jedi_fv3inc CTest input YAML according to the new YAML format. 5. Create a new fv3jedi_fv3inc_lgetkf.yaml for an eventual use in converting the ensemble job increment in Global Workflow. Testing these new features yields the following results: 1. For delp and hydrostatic_delz in ensemble DA, the FV3 increment produced by the deterministic variational analysis in Global Workflow (branch feature/jediinc2fv3) is zero diff with develop, which has the previous version of the OOPS-based increment converter. I tested the other increment variables by writing the FV3 increment out to the native cubed sphere and compared with the JEDI increment. They are zero diff. 2. The gdasapp_test_fv3jedi_fv3inc passes with the same test reference used in develop. 3. Using a special Global Workflow branch to test the lgetkf analysis, the YAML templating works, and the resulting FV3 increment variables, delp and hydrostatic_delz, differ from the one values produced by develop (using jediinc2fv3.py) within a reasonable relative error, similar to that when I first tested the increment converter against jediinc2fv3.py with the variational analysis. --- parm/atm/utils/fv3jedi_fv3inc.yaml.j2 | 63 ------- parm/atm/utils/fv3jedi_fv3inc_lgetkf.yaml.j2 | 62 +++++++ .../utils/fv3jedi_fv3inc_variational.yaml.j2 | 62 +++++++ .../testinput/gdasapp_fv3jedi_fv3inc.yaml | 91 +++++----- utils/fv3jedi/fv3jedi_fv3inc.h | 171 +++++++++++++----- 5 files changed, 298 insertions(+), 151 deletions(-) delete mode 100644 parm/atm/utils/fv3jedi_fv3inc.yaml.j2 create mode 100644 parm/atm/utils/fv3jedi_fv3inc_lgetkf.yaml.j2 create mode 100644 parm/atm/utils/fv3jedi_fv3inc_variational.yaml.j2 diff --git a/parm/atm/utils/fv3jedi_fv3inc.yaml.j2 b/parm/atm/utils/fv3jedi_fv3inc.yaml.j2 deleted file mode 100644 index cb27bb7fa..000000000 --- a/parm/atm/utils/fv3jedi_fv3inc.yaml.j2 +++ /dev/null @@ -1,63 +0,0 @@ -variable change: - variable change name: Model2GeoVaLs - input variables: &inputvars [ua,va,t,ps,sphum,ice_wat,liq_wat,o3mr,phis] - output variables: [ua,va,t,sphum,ice_wat,liq_wat,o3mr,delp,hydrostatic_delz] -background: - geometry: - fms initialization: - namelist filename: ./fv3jedi/fmsmpp.nml - field table filename: ./fv3jedi/field_table - akbk: ./fv3jedi/akbk.nc4 - layout: - - {{ layout_x }} - - {{ layout_y }} - npx: {{ npx_ges }} - npy: {{ npy_ges }} - npz: {{ npz_ges }} - field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_restart.yaml - input: - datapath: ./bkg - filetype: fms restart - datetime: '{{ current_cycle | to_isotime }}' - filename_core: '{{ current_cycle | to_fv3time }}.fv_core.res.nc' - filename_trcr: '{{ current_cycle | to_fv3time }}.fv_tracer.res.nc' - filename_sfcd: '{{ current_cycle | to_fv3time }}.sfc_data.nc' - filename_sfcw: '{{ current_cycle | to_fv3time }}.fv_srf_wnd.res.nc' - filename_cplr: '{{ current_cycle | to_fv3time }}.coupler.res' - state variables: *inputvars -jedi increment: - input variables: [ua,va,t,ps,sphum,ice_wat,liq_wat,o3mr] - geometry: - fms initialization: - namelist filename: ./fv3jedi/fmsmpp.nml - field table filename: ./fv3jedi/field_table - akbk: ./fv3jedi/akbk.nc4 - layout: - - {{ layout_x }} - - {{ layout_y }} - npx: {{ npx_anl }} - npy: {{ npy_anl }} - npz: {{ npz_anl }} - field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_history.yaml - input: - filetype: cube sphere history - filename: ./anl/atminc.{{ current_cycle | to_fv3time }}.nc4 - provider: ufs -fv3 increment: - geometry: - fms initialization: - namelist filename: ./fv3jedi/fmsmpp.nml - field table filename: ./fv3jedi/field_table - akbk: ./fv3jedi/akbk.nc4 - layout: - - {{ layout_x }} - - {{ layout_y }} - npx: {{ npx_anl }} - npy: {{ npy_anl }} - npz: {{ npz_anl }} - field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_fv3inc.yaml - output: - filetype: auxgrid - gridtype: gaussian - filename: ./anl/atminc. - diff --git a/parm/atm/utils/fv3jedi_fv3inc_lgetkf.yaml.j2 b/parm/atm/utils/fv3jedi_fv3inc_lgetkf.yaml.j2 new file mode 100644 index 000000000..786dfd171 --- /dev/null +++ b/parm/atm/utils/fv3jedi_fv3inc_lgetkf.yaml.j2 @@ -0,0 +1,62 @@ +variable change: + variable change name: Model2GeoVaLs + input variables: &bkgvars [ua,va,t,ps,sphum,ice_wat,liq_wat,o3mr,surface_geopotential_height] + output variables: &fv3incrvars [ua,va,t,sphum,ice_wat,liq_wat,o3mr,delp,hydrostatic_delz] +jedi increment variables: [ua,va,t,ps,sphum,ice_wat,liq_wat,o3mr] +fv3 increment variables: *fv3incrvars +background geometry: + fms initialization: + namelist filename: ./fv3jedi/fmsmpp.nml + field table filename: ./fv3jedi/field_table + akbk: ./fv3jedi/akbk.nc4 + layout: + - {{ layout_x }} + - {{ layout_y }} + npx: {{ npx_ges }} + npy: {{ npy_ges }} + npz: {{ npz_ges }} + field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_history.yaml +jedi increment geometry: + fms initialization: + namelist filename: ./fv3jedi/fmsmpp.nml + field table filename: ./fv3jedi/field_table + akbk: ./fv3jedi/akbk.nc4 + layout: + - {{ layout_x }} + - {{ layout_y }} + npx: {{ npx_ges }} + npy: {{ npy_ges }} + npz: {{ npz_ges }} + field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_history.yaml +fv3 increment geometry: + fms initialization: + namelist filename: ./fv3jedi/fmsmpp.nml + field table filename: ./fv3jedi/field_table + akbk: ./fv3jedi/akbk.nc4 + layout: + - {{ layout_x }} + - {{ layout_y }} + npx: {{ npx_ges }} + npy: {{ npy_ges }} + npz: {{ npz_ges }} + field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_fv3inc.yaml +members from template: + template: + background input: + datetime: '{{ current_cycle | to_isotime }}' + filetype: cube sphere history + provider: ufs + datapath: ./bkg/mem%mem% + filename: {{ EPREFIX }}atmf006.nc + state variables: *bkgvars + jedi increment input: + filetype: cube sphere history + filename: ./anl/mem%mem%/atminc.{{ current_cycle | to_fv3time }}.nc4 + provider: ufs + fv3 increment output: + filetype: auxgrid + gridtype: gaussian + filename: ./anl/mem%mem%/atminc. + pattern: '%mem%' + nmembers: {{ NMEM_ENS }} + zero padding: 3 diff --git a/parm/atm/utils/fv3jedi_fv3inc_variational.yaml.j2 b/parm/atm/utils/fv3jedi_fv3inc_variational.yaml.j2 new file mode 100644 index 000000000..9467b1526 --- /dev/null +++ b/parm/atm/utils/fv3jedi_fv3inc_variational.yaml.j2 @@ -0,0 +1,62 @@ +variable change: + variable change name: Model2GeoVaLs + input variables: &bkgvars [ua,va,t,ps,sphum,ice_wat,liq_wat,o3mr,phis] + output variables: &fv3incrvars [ua,va,t,sphum,ice_wat,liq_wat,o3mr,delp,hydrostatic_delz] +jedi increment variables: [ua,va,t,ps,sphum,ice_wat,liq_wat,o3mr] +fv3 increment variables: *fv3incrvars +background geometry: + fms initialization: + namelist filename: ./fv3jedi/fmsmpp.nml + field table filename: ./fv3jedi/field_table + akbk: ./fv3jedi/akbk.nc4 + layout: + - {{ layout_x }} + - {{ layout_y }} + npx: {{ npx_ges }} + npy: {{ npy_ges }} + npz: {{ npz_ges }} + field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_restart.yaml +jedi increment geometry: + fms initialization: + namelist filename: ./fv3jedi/fmsmpp.nml + field table filename: ./fv3jedi/field_table + akbk: ./fv3jedi/akbk.nc4 + layout: + - {{ layout_x }} + - {{ layout_y }} + npx: {{ npx_anl }} + npy: {{ npy_anl }} + npz: {{ npz_anl }} + field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_history.yaml +fv3 increment geometry: + fms initialization: + namelist filename: ./fv3jedi/fmsmpp.nml + field table filename: ./fv3jedi/field_table + akbk: ./fv3jedi/akbk.nc4 + layout: + - {{ layout_x }} + - {{ layout_y }} + npx: {{ npx_anl }} + npy: {{ npy_anl }} + npz: {{ npz_anl }} + field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_fv3inc.yaml +members: +- background input: + datapath: ./bkg + filetype: fms restart + datetime: '{{ current_cycle | to_isotime }}' + filename_core: '{{ current_cycle | to_fv3time }}.fv_core.res.nc' + filename_trcr: '{{ current_cycle | to_fv3time }}.fv_tracer.res.nc' + filename_sfcd: '{{ current_cycle | to_fv3time }}.sfc_data.nc' + filename_sfcw: '{{ current_cycle | to_fv3time }}.fv_srf_wnd.res.nc' + filename_cplr: '{{ current_cycle | to_fv3time }}.coupler.res' + state variables: *bkgvars + jedi increment input: + filetype: cube sphere history + filename: ./anl/atminc.{{ current_cycle | to_fv3time }}.nc4 + provider: ufs + fv3 increment output: + filetype: auxgrid + gridtype: gaussian + filename: ./anl/atminc. + diff --git a/test/fv3jedi/testinput/gdasapp_fv3jedi_fv3inc.yaml b/test/fv3jedi/testinput/gdasapp_fv3jedi_fv3inc.yaml index b0c08350c..185f8789c 100644 --- a/test/fv3jedi/testinput/gdasapp_fv3jedi_fv3inc.yaml +++ b/test/fv3jedi/testinput/gdasapp_fv3jedi_fv3inc.yaml @@ -1,60 +1,59 @@ variable change: variable change name: Model2GeoVaLs - input variables: &inputvars [ua,va,t,ps,sphum,ice_wat,liq_wat,o3mr,phis] - output variables: [ua,va,t,sphum,ice_wat,liq_wat,o3mr,delp,hydrostatic_delz] -background: - geometry: - fms initialization: - namelist filename: ./fv3jedi/fmsmpp.nml - field table filename: ./fv3jedi/field_table_gfdl - akbk: ./fv3jedi/akbk127.nc4 - layout: - - '1' - - '1' - npx: '13' - npy: '13' - npz: '127' - field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_history.yaml - input: + input variables: &bkgvars [ua,va,t,ps,sphum,ice_wat,liq_wat,o3mr,phis] + output variables: &fv3incrvars [ua,va,t,sphum,ice_wat,liq_wat,o3mr,delp,hydrostatic_delz] +jedi increment variables: [ua,va,t,ps,sphum,ice_wat,liq_wat,o3mr] +fv3 increment variables: *fv3incrvars +background geometry: + fms initialization: + namelist filename: ./fv3jedi/fmsmpp.nml + field table filename: ./fv3jedi/field_table_gfdl + akbk: ./fv3jedi/akbk127.nc4 + layout: + - '1' + - '1' + npx: '13' + npy: '13' + npz: '127' + field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_history.yaml +jedi increment geometry: + fms initialization: + namelist filename: ./fv3jedi/fmsmpp.nml + field table filename: ./fv3jedi/field_table_gfdl + akbk: ./fv3jedi/akbk127.nc4 + layout: + - '1' + - '1' + npx: '13' + npy: '13' + npz: '127' + field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_history.yaml +fv3 increment geometry: + fms initialization: + namelist filename: ./fv3jedi/fmsmpp.nml + field table filename: ./fv3jedi/field_table_gfdl + akbk: ./fv3jedi/akbk127.nc4 + layout: + - '1' + - '1' + npx: '13' + npy: '13' + npz: '127' + field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_fv3inc.yaml +members: +- background input: filetype: cube sphere history datapath: ../testdata/ provider: ufs datetime: '2021-07-31T12:00:00Z' filename: gdas.t06z.atmf006.nc - state variables: [ua,va,t,ps,sphum,ice_wat,liq_wat,o3mr,phis] -jedi increment: - input variables: [ua,va,t,ps,sphum,ice_wat,liq_wat,o3mr] - geometry: - fms initialization: - namelist filename: ./fv3jedi/fmsmpp.nml - field table filename: ./fv3jedi/field_table_gfdl - akbk: ./fv3jedi/akbk127.nc4 - layout: - - '1' - - '1' - npx: '13' - npy: '13' - npz: '127' - field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_history.yaml - input: + state variables: *bkgvars + jedi increment input: filetype: cube sphere history datapath: ../testdata/ filename: atminc.20210731.120000.nc4 provider: ufs -fv3 increment: - geometry: - fms initialization: - namelist filename: ./fv3jedi/fmsmpp.nml - field table filename: ./fv3jedi/field_table_gfdl - akbk: ./fv3jedi/akbk127.nc4 - layout: - - '1' - - '1' - npx: '13' - npy: '13' - npz: '127' - field metadata override: ./fv3jedi/fv3jedi_fieldmetadata_fv3inc.yaml - output: + fv3 increment output: filetype: cube sphere history filename: atminc.20210731_120000.nc4 provider: ufs diff --git a/utils/fv3jedi/fv3jedi_fv3inc.h b/utils/fv3jedi/fv3jedi_fv3inc.h index 9e732c337..d0052fe87 100644 --- a/utils/fv3jedi/fv3jedi_fv3inc.h +++ b/utils/fv3jedi/fv3jedi_fv3inc.h @@ -3,6 +3,9 @@ #include #include #include +#include + +#include "atlas/field/FieldSet.h" #include "eckit/config/LocalConfiguration.h" @@ -13,6 +16,7 @@ #include "oops/mpi/mpi.h" #include "oops/runs/Application.h" +#include "oops/util/ConfigFunctions.h" #include "oops/util/DateTime.h" #include "oops/util/Logger.h" @@ -26,56 +30,139 @@ namespace gdasapp { static const std::string classname() {return "gdasapp::fv3inc";} int execute(const eckit::Configuration & fullConfig, bool validate) const { - // Setup variable change - const eckit::LocalConfiguration varChangeConfig(fullConfig, "variable change"); - oops::Variables stateVarin(varChangeConfig, "input variables"); - oops::Variables stateVarout(varChangeConfig, "output variables"); + // Configurations + // --------------------------------------------------------------------------------- - // Setup background - const eckit::LocalConfiguration bkgConfig(fullConfig, "background"); - const eckit::LocalConfiguration stateGeomConfig(bkgConfig, "geometry"); - const eckit::LocalConfiguration stateInputConfig(bkgConfig, "input"); + // Get variable change + const eckit::LocalConfiguration varChangeConfig(fullConfig, "variable change"); + oops::Variables bkgVars(varChangeConfig, "input variables"); + oops::Variables varChangeIncrVars(varChangeConfig, "output variables"); + + // Get increment variables + oops::Variables jediIncrVars(fullConfig, "jedi increment variables"); + oops::Variables fv3IncrVars(fullConfig, "fv3 increment variables"); + + // Get geometries + const eckit::LocalConfiguration stateGeomConfig(fullConfig, "background geometry"); + const eckit::LocalConfiguration jediIncrGeomConfig(fullConfig, "jedi increment geometry"); + const eckit::LocalConfiguration fv3IncrGeomConfig(fullConfig, "fv3 increment geometry"); + + // Ensemble Members + int nmem; + std::vector membersConfig; + if ( fullConfig.has("members") ) { + fullConfig.get("members", membersConfig); + nmem = membersConfig.size(); + } else { + eckit::LocalConfiguration membersFromTemplateConfig(fullConfig, "members from template"); + eckit::LocalConfiguration templateConfig(membersFromTemplateConfig, "template"); + std::string pattern; + membersFromTemplateConfig.get("pattern", pattern); + membersFromTemplateConfig.get("nmembers", nmem); + int start = 1; + if (membersFromTemplateConfig.has("start")) { + membersFromTemplateConfig.get("start", start); + } + std::vector except; + if (membersFromTemplateConfig.has("except")) { + membersFromTemplateConfig.get("except", except); + } + int zpad = 0; + if ( membersFromTemplateConfig.has("zero padding") ) { + membersFromTemplateConfig.get("zero padding", zpad); + } + + int count = start; + for ( int imem = 0; imem < nmem; imem++ ) { + while (std::count(except.begin(), except.end(), count)) { + count += 1; + } + eckit::LocalConfiguration memberConfig(templateConfig); + util::seekAndReplace(memberConfig, pattern, count, zpad); + membersConfig.push_back(memberConfig); + count += 1; + } + } + + // Setup + // --------------------------------------------------------------------------------- + + // Setup geometries const fv3jedi::Geometry stateGeom(stateGeomConfig, this->getComm()); - fv3jedi::State xxBkg(stateGeom, stateInputConfig); - oops::Log::test() << "Background State: " << std::endl << xxBkg << std::endl; - - // Setup JEDI increment - const eckit::LocalConfiguration jediIncrConfig(fullConfig, "jedi increment"); - const eckit::LocalConfiguration jediIncrGeomConfig(jediIncrConfig, "geometry"); - const eckit::LocalConfiguration jediIncrInputConfig(jediIncrConfig, "input"); - oops::Variables jediIncrVarin(jediIncrConfig, "input variables"); const fv3jedi::Geometry jediIncrGeom(jediIncrGeomConfig, this->getComm()); - fv3jedi::Increment dx(jediIncrGeom, jediIncrVarin, xxBkg.validTime()); - dx.read(jediIncrInputConfig); - oops::Log::test() << "JEDI Increment: " << std::endl << dx << std::endl; - - // Setup FV3 increment - const eckit::LocalConfiguration fv3IncrConfig(fullConfig, "fv3 increment"); - const eckit::LocalConfiguration fv3IncrGeomConfig(fv3IncrConfig, "geometry"); - const eckit::LocalConfiguration fv3IncrOuputConfig(fv3IncrConfig, "output"); const fv3jedi::Geometry fv3IncrGeom(fv3IncrGeomConfig, this->getComm()); - // + // Setup variable change std::unique_ptr vc; vc.reset(new fv3jedi::VariableChange(varChangeConfig, stateGeom)); - // ---------------------------------------------------------------------------- - - // Add increment to background to get analysis - fv3jedi::State xxAnl(stateGeom, xxBkg); - xxAnl += dx; - - // Perform variables change on background and analysis - vc->changeVar(xxBkg, stateVarout); - vc->changeVar(xxAnl, stateVarout); - - // Get final FV3 increment - fv3jedi::Increment dxFV3(fv3IncrGeom, stateVarout, xxBkg.validTime()); - dxFV3.diff(xxAnl, xxBkg); - oops::Log::test() << "FV3 Increment: " << std::endl << dxFV3 << std::endl; - - // Write FV3 increment - dxFV3.write(fv3IncrOuputConfig); + // Loop through ensemble member + // --------------------------------------------------------------------------------- + + for ( int imem = 0; imem < nmem; imem++ ) { + // Inputs setup + // --------------------------------------------------------------------------------- + + // Get input configurations + eckit::LocalConfiguration stateInputConfig(membersConfig[imem], "background input"); + eckit::LocalConfiguration jediIncrInputConfig(membersConfig[imem], "jedi increment input"); + eckit::LocalConfiguration fv3IncrOuputConfig(membersConfig[imem], "fv3 increment output"); + + // Read background state + fv3jedi::State xxBkg(stateGeom, stateInputConfig); + + // Read JEDI increment + fv3jedi::Increment dxJEDI(jediIncrGeom, jediIncrVars, xxBkg.validTime()); + dxJEDI.read(jediIncrInputConfig); + + // Increment conversion + // --------------------------------------------------------------------------------- + + // Add JEDI increment to background to get analysis + fv3jedi::State xxAnl(stateGeom, xxBkg); + xxAnl += dxJEDI; + + // Perform variables change on background and analysis + vc->changeVar(xxBkg, varChangeIncrVars); + vc->changeVar(xxAnl, varChangeIncrVars); + + // Get FV3 increment by subtracting background and analysis after variable change + fv3jedi::Increment dxFV3(fv3IncrGeom, fv3IncrVars, xxBkg.validTime()); + dxFV3.diff(xxAnl, xxBkg); + + // Put JEDI increment fields not created by variable change into FV3 increment + // because of interpolation between full and half resolution. Also, + // mixing-ratio variables may be set to zero in the analysis if the increment + // addition makes them negative. In both cases, subtracting the background and + // analysis won't yield back the same increment as we started with. + atlas::FieldSet dxFsJEDI; + atlas::FieldSet dxFsFV3; + dxJEDI.toFieldSet(dxFsJEDI); + dxFV3.toFieldSet(dxFsFV3); + for (size_t iField = 0; iField < dxFsFV3.size(); ++iField) { + if ( dxFsJEDI.has(dxFsFV3[iField].name()) ) { + auto viewJEDI = atlas::array::make_view(dxFsJEDI[dxFsFV3[iField].name()]); + auto viewFV3 = atlas::array::make_view(dxFsFV3[iField]); + + size_t gridSize = viewFV3.shape(0); + int nLevels = viewFV3.shape(1); + for (int iLevel = 0; iLevel < nLevels + 1; ++iLevel) { + for ( size_t jNode = 0; jNode < gridSize ; ++jNode ) { + viewFV3(jNode, iLevel) = viewJEDI(jNode, iLevel); + } + } + } + } + dxFV3.fromFieldSet(dxFsFV3); + + // Write FV3 increment + dxFV3.write(fv3IncrOuputConfig); + + // Output for testing + oops::Log::test() << "Background State: " << std::endl << xxBkg << std::endl; + oops::Log::test() << "JEDI Increment: " << std::endl << dxJEDI << std::endl; + oops::Log::test() << "FV3 Increment: " << std::endl << dxFV3 << std::endl; + } return 0; } From 0d7f9852b362ce4c60bb6a406efcfc24c9ec0ca6 Mon Sep 17 00:00:00 2001 From: Cory Martin Date: Thu, 11 Apr 2024 16:56:18 +0000 Subject: [PATCH 2/8] Update femps and fv3-jedi-lm (#1036) FV3-JEDI has been updated to require newer tags of femps and FV3-JEDI-linearmodel. This updates the submodules to include these versions since they are not updated frequently and not part of stable-nightly. --- sorc/femps | 2 +- sorc/fv3-jedi-lm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sorc/femps b/sorc/femps index cb396811e..4f12677d3 160000 --- a/sorc/femps +++ b/sorc/femps @@ -1 +1 @@ -Subproject commit cb396811eb26380478c4d3f177d95096ed2ddd8f +Subproject commit 4f12677d345e683bf910b5f76f0df120ad27482d diff --git a/sorc/fv3-jedi-lm b/sorc/fv3-jedi-lm index 05cc1ae63..af67095ee 160000 --- a/sorc/fv3-jedi-lm +++ b/sorc/fv3-jedi-lm @@ -1 +1 @@ -Subproject commit 05cc1ae63252ca535f3db0fdca9a8a996329fc8f +Subproject commit af67095ee87ffb472218aa386e34c6bfe64ca424 From fadffb0cc9d20e22831d355cd8e22acaf221ca95 Mon Sep 17 00:00:00 2001 From: AndrewEichmann-NOAA <58948505+AndrewEichmann-NOAA@users.noreply.github.com> Date: Fri, 12 Apr 2024 11:38:06 -0400 Subject: [PATCH 3/8] Run g-w linker script before ctest for prepoceanobs task (#1034) In anticipation of the merging of https://github.com/NOAA-EMC/global-workflow/pull/2459 , the `test_gdasapp_soca_setup_obsprep` ctest which sets up for `test_gdasapp_soca_JGLOBAL_PREP_OCEAN_OBS` now runs the global-workflow `sorc/link_workflow.sh` script to allow the observation config files to be accessed in a global-workflow-compliant way. Effectively addresses https://github.com/NOAA-EMC/GDASApp/issues/1032 --- test/soca/gw/setup_obsprep.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/soca/gw/setup_obsprep.sh b/test/soca/gw/setup_obsprep.sh index 5759cf829..5583d4ce5 100755 --- a/test/soca/gw/setup_obsprep.sh +++ b/test/soca/gw/setup_obsprep.sh @@ -13,6 +13,9 @@ project_source_dir="$1" testdatadir="${project_source_dir}/test/soca/testdata" testdatadir_bufr="${project_source_dir}/build/gdas/test/testdata" +# run the g-w link script so that the config files can be found +(cd ${project_source_dir}/../../.. && ./link_workflow.sh) + # Clean up previous attempts rm -rf gdas.20180414 gdas.20180415 From 1a84448b190adf78b4f39459699ebd2ddac90cae Mon Sep 17 00:00:00 2001 From: DavidNew-NOAA <134300700+DavidNew-NOAA@users.noreply.github.com> Date: Fri, 12 Apr 2024 16:27:11 -0400 Subject: [PATCH 4/8] Fix test output for fv3jedi_fv3inc.h (#1039) I foolishly moved the test output print statements in fv3jedi_fv3inc.h around for aesthetic reasons AFTER I had ran test_gdasapp_fv3jedi_fv3inc to confirm that it still passed. It broke the test but I moved the print statements back to where they belong. The test now passes. --------- Co-authored-by: Cory Martin --- utils/fv3jedi/fv3jedi_fv3inc.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/utils/fv3jedi/fv3jedi_fv3inc.h b/utils/fv3jedi/fv3jedi_fv3inc.h index d0052fe87..fb52dd6ea 100644 --- a/utils/fv3jedi/fv3jedi_fv3inc.h +++ b/utils/fv3jedi/fv3jedi_fv3inc.h @@ -110,10 +110,12 @@ namespace gdasapp { // Read background state fv3jedi::State xxBkg(stateGeom, stateInputConfig); + oops::Log::test() << "Background State: " << std::endl << xxBkg << std::endl; // Read JEDI increment fv3jedi::Increment dxJEDI(jediIncrGeom, jediIncrVars, xxBkg.validTime()); dxJEDI.read(jediIncrInputConfig); + oops::Log::test() << "JEDI Increment: " << std::endl << dxJEDI << std::endl; // Increment conversion // --------------------------------------------------------------------------------- @@ -154,14 +156,10 @@ namespace gdasapp { } } dxFV3.fromFieldSet(dxFsFV3); + oops::Log::test() << "FV3 Increment: " << std::endl << dxFV3 << std::endl; // Write FV3 increment dxFV3.write(fv3IncrOuputConfig); - - // Output for testing - oops::Log::test() << "Background State: " << std::endl << xxBkg << std::endl; - oops::Log::test() << "JEDI Increment: " << std::endl << dxJEDI << std::endl; - oops::Log::test() << "FV3 Increment: " << std::endl << dxFV3 << std::endl; } return 0; From c2fdf1e652b8bb367e1e33780acc8f94811a25f5 Mon Sep 17 00:00:00 2001 From: DavidNew-NOAA <134300700+DavidNew-NOAA@users.noreply.github.com> Date: Mon, 15 Apr 2024 08:23:55 -0400 Subject: [PATCH 5/8] Fix GW jjob tests for upcoming GW PR #2420 (#1041) This PR addresses issue [#1011](https://github.com/NOAA-EMC/GDASApp/issues/1011), related to the failure of the "gdasatmanlvar" jjob test due to a change in the name of the "gdasatmanlvar" run script, and, with the upcoming Global Workflow PR [#2420](https://github.com/NOAA-EMC/global-workflow/pull/2420), the impending failure of the "gdasatmanlfinal" jjob. This fixes the script name in the "gdasatmanlvar" test and adds a new test for "gdasatmanlfv3inc" which will fix the issure with "gdasatmanlfinal". The "gdasatmanlfv3inc" and "gdasatmanlfinal" won't pass yes in GW develop, but will after [#2420](https://github.com/NOAA-EMC/global-workflow/pull/2420) merges GW feature/jediinc2fv3. This PR is just a verbatim copy of @RussTreadon-NOAA 's work, taken from his comment [here](https://github.com/NOAA-EMC/GDASApp/issues/1011#issuecomment-2052249333). I re-ran the new tests, and they also passed for me in GW feature/jediinc2fv3. --- test/atm/global-workflow/CMakeLists.txt | 5 +++ test/atm/global-workflow/jjob_var_inc.sh | 55 ++++++++++++++++++++++++ test/atm/global-workflow/jjob_var_run.sh | 6 +-- utils/fv3jedi/fv3jedi_fv3inc.h | 2 +- 4 files changed, 64 insertions(+), 4 deletions(-) create mode 100755 test/atm/global-workflow/jjob_var_inc.sh diff --git a/test/atm/global-workflow/CMakeLists.txt b/test/atm/global-workflow/CMakeLists.txt index c911f1a9f..cda1d56d4 100644 --- a/test/atm/global-workflow/CMakeLists.txt +++ b/test/atm/global-workflow/CMakeLists.txt @@ -16,6 +16,11 @@ add_test(NAME test_gdasapp_atm_jjob_var_run ${PROJECT_BINARY_DIR} ${PROJECT_SOURCE_DIR} WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/test/atm/global-workflow/testrun) +add_test(NAME test_gdasapp_atm_jjob_var_inc + COMMAND ${PROJECT_SOURCE_DIR}/test/atm/global-workflow/jjob_var_inc.sh + ${PROJECT_BINARY_DIR} ${PROJECT_SOURCE_DIR} + WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/test/atm/global-workflow/testrun) + add_test(NAME test_gdasapp_atm_jjob_var_final COMMAND ${PROJECT_SOURCE_DIR}/test/atm/global-workflow/jjob_var_final.sh ${PROJECT_BINARY_DIR} ${PROJECT_SOURCE_DIR} diff --git a/test/atm/global-workflow/jjob_var_inc.sh b/test/atm/global-workflow/jjob_var_inc.sh new file mode 100755 index 000000000..71a91325b --- /dev/null +++ b/test/atm/global-workflow/jjob_var_inc.sh @@ -0,0 +1,55 @@ +#! /usr/bin/env bash + +set -x +bindir=$1 +srcdir=$2 + +# Set g-w HOMEgfs +topdir=$(cd "$(dirname "$(readlink -f -n "${bindir}" )" )/../../.." && pwd -P) +export HOMEgfs=$topdir + +# Set variables for ctest +export PSLOT=gdas_test +export EXPDIR=$bindir/test/atm/global-workflow/testrun/experiments/$PSLOT +export PDY=20210323 +export cyc=18 +export CDATE=${PDY}${cyc} +export ROTDIR=$bindir/test/atm/global-workflow/testrun/ROTDIRS/$PSLOT +export RUN=gdas +export CDUMP=gdas +export DATAROOT=$bindir/test/atm/global-workflow/testrun/RUNDIRS/$PSLOT +export COMIN_GES=${bindir}/test/atm/bkg +export pid=${pid:-$$} +export jobid=$pid +export COMROOT=$DATAROOT +export NMEM_ENS=0 +export ACCOUNT=da-cpu + +# Set python path for workflow utilities and tasks +wxflowPATH="${HOMEgfs}/ush/python:${HOMEgfs}/ush/python/wxflow" +PYTHONPATH="${PYTHONPATH:+${PYTHONPATH}:}${wxflowPATH}" +export PYTHONPATH + +# Detemine machine from config.base +machine=$(echo `grep 'machine=' $EXPDIR/config.base | cut -d"=" -f2` | tr -d '"') + +# Set NETCDF and UTILROOT variables (used in config.base) +if [[ $machine = 'HERA' ]]; then + NETCDF=$( which ncdump ) + export NETCDF + export UTILROOT="/scratch2/NCEPDEV/ensemble/save/Walter.Kolczynski/hpc-stack/intel-18.0.5.274/prod_util/1.2.2" +elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then + ncdump=$( which ncdump ) + NETCDF=$( echo "${ncdump}" | cut -d " " -f 3 ) + export NETCDF + export UTILROOT=/work2/noaa/da/python/opt/intel-2022.1.2/prod_util/1.2.2 +fi + +# Execute j-job +if [[ $machine = 'HERA' ]]; then + sbatch --ntasks=6 --account=$ACCOUNT --qos=batch --time=00:10:00 --export=ALL --wait ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FV3_INCREMENT +elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then + sbatch --ntasks=6 --account=$ACCOUNT --qos=batch --time=00:10:00 --export=ALL --wait ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FV3_INCREMENT +else + ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_FV3_INCREMENT +fi diff --git a/test/atm/global-workflow/jjob_var_run.sh b/test/atm/global-workflow/jjob_var_run.sh index e7471bec1..6239b5b84 100755 --- a/test/atm/global-workflow/jjob_var_run.sh +++ b/test/atm/global-workflow/jjob_var_run.sh @@ -50,9 +50,9 @@ fi # Execute j-job if [[ $machine = 'HERA' ]]; then - sbatch --ntasks=6 --account=$ACCOUNT --qos=batch --time=00:10:00 --export=ALL --wait ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_RUN + sbatch --ntasks=6 --account=$ACCOUNT --qos=batch --time=00:10:00 --export=ALL --wait ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_VARIATIONAL elif [[ $machine = 'ORION' || $machine = 'HERCULES' ]]; then - sbatch --ntasks=6 --account=$ACCOUNT --qos=batch --time=00:10:00 --export=ALL --wait ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_RUN + sbatch --ntasks=6 --account=$ACCOUNT --qos=batch --time=00:10:00 --export=ALL --wait ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_VARIATIONAL else - ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_RUN + ${HOMEgfs}/jobs/JGLOBAL_ATM_ANALYSIS_VARIATIONAL fi diff --git a/utils/fv3jedi/fv3jedi_fv3inc.h b/utils/fv3jedi/fv3jedi_fv3inc.h index fb52dd6ea..8044692a0 100644 --- a/utils/fv3jedi/fv3jedi_fv3inc.h +++ b/utils/fv3jedi/fv3jedi_fv3inc.h @@ -148,7 +148,7 @@ namespace gdasapp { size_t gridSize = viewFV3.shape(0); int nLevels = viewFV3.shape(1); - for (int iLevel = 0; iLevel < nLevels + 1; ++iLevel) { + for (int iLevel = 0; iLevel < nLevels; ++iLevel) { for ( size_t jNode = 0; jNode < gridSize ; ++jNode ) { viewFV3(jNode, iLevel) = viewJEDI(jNode, iLevel); } From fd1ebdad33245b7d9184a1972e1c73d5caf3881d Mon Sep 17 00:00:00 2001 From: RussTreadon-NOAA <26926959+RussTreadon-NOAA@users.noreply.github.com> Date: Mon, 15 Apr 2024 14:57:14 -0400 Subject: [PATCH 6/8] remove seviri from gdas_prototype_3d yaml (#1043) --- parm/atm/obs/lists/gdas_prototype_3d.yaml.j2 | 2 -- 1 file changed, 2 deletions(-) diff --git a/parm/atm/obs/lists/gdas_prototype_3d.yaml.j2 b/parm/atm/obs/lists/gdas_prototype_3d.yaml.j2 index 11d43e2d0..f576fc25e 100644 --- a/parm/atm/obs/lists/gdas_prototype_3d.yaml.j2 +++ b/parm/atm/obs/lists/gdas_prototype_3d.yaml.j2 @@ -7,6 +7,4 @@ observers: {% include 'atm/obs/config/ompsnp_npp.yaml.j2' %} {% include 'atm/obs/config/ompstc_npp.yaml.j2' %} {% include 'atm/obs/config/omi_aura.yaml.j2' %} -{% include 'atm/obs/config/seviri_m08.yaml.j2' %} -{% include 'atm/obs/config/seviri_m11.yaml.j2' %} {% endfilter %} From 6f7bd18f655f23a3f66d31e7ee54d328db450ac6 Mon Sep 17 00:00:00 2001 From: Mindo Choi <141867620+apchoiCMD@users.noreply.github.com> Date: Mon, 15 Apr 2024 17:28:29 -0400 Subject: [PATCH 7/8] Fix bug for datetime in GHRSST Ioda Converter (#1027) - Resolve the FillValue issue within obs/metadata calculation loop - Fix the encountering a precision issue when converting the integer values for datetime issue previously occurred - Fixes #1019 --------- Co-authored-by: Guillaume Vernieres --- utils/obsproc/Ghrsst2Ioda.h | 38 +++++++++++------------------ utils/test/testref/ghrsst2ioda.test | 6 ++--- 2 files changed, 17 insertions(+), 27 deletions(-) diff --git a/utils/obsproc/Ghrsst2Ioda.h b/utils/obsproc/Ghrsst2Ioda.h index 7f0082da8..fa277a8e8 100644 --- a/utils/obsproc/Ghrsst2Ioda.h +++ b/utils/obsproc/Ghrsst2Ioda.h @@ -86,6 +86,8 @@ namespace gdasapp { ncFile.getVar("sst_dtime").getVar(sstdTime.data()); float dtimeScaleFactor; ncFile.getVar("sst_dtime").getAtt("scale_factor").getValues(&dtimeScaleFactor); + int32_t dtimeFillValue; + ncFile.getVar("sst_dtime").getAtt("_FillValue").getValues(&dtimeFillValue); // Read SST ObsValue std::vector sstObsVal(dimTime*dimLat*dimLon); @@ -118,7 +120,7 @@ namespace gdasapp { std::vector> sst(dimLat, std::vector(dimLon)); std::vector> obserror(dimLat, std::vector(dimLon)); std::vector> preqc(dimLat, std::vector(dimLon)); - std::vector> seconds(dimLat, std::vector(dimLon)); + std::vector> seconds(dimLat, std::vector(dimLon)); size_t index = 0; for (int i = 0; i < dimLat; i++) { for (int j = 0; j < dimLon; j++) { @@ -143,9 +145,13 @@ namespace gdasapp { // TODO(Somebody): add sampled std. dev. of sst to the total obs error obserror[i][j] = static_cast(sstObsErr[index]) * errScaleFactor + errOffSet; - // epoch time in seconds - seconds[i][j] = static_cast(sstdTime[index]) * dtimeScaleFactor - + static_cast(refTime[0]); + // epoch time in seconds and avoid to use FillValue in calculation + if (sstdTime[index] == dtimeFillValue) { + seconds[i][j] = 0; + } else { + seconds[i][j] = static_cast(sstdTime[index]) * dtimeScaleFactor + + static_cast(refTime[0]); + } index++; } } @@ -157,36 +163,21 @@ namespace gdasapp { std::vector> lon2d_s; std::vector> lat2d_s; std::vector> obserror_s; + std::vector> seconds_s; if ( fullConfig_.has("binning") ) { sst_s = gdasapp::superobutils::subsample2D(sst, mask, fullConfig_); lon2d_s = gdasapp::superobutils::subsample2D(lon2d, mask, fullConfig_); lat2d_s = gdasapp::superobutils::subsample2D(lat2d, mask, fullConfig_); obserror_s = gdasapp::superobutils::subsample2D(obserror, mask, fullConfig_); + seconds_s = gdasapp::superobutils::subsample2D(seconds, mask, fullConfig_); } else { sst_s = sst; lon2d_s = lon2d; lat2d_s = lat2d; obserror_s = obserror; + seconds_s = seconds; } - // Non-Superobing part only applied to datetime - // TODO(Mindo): Refactor to make superob capable of processing all data - // TODO(ASGM) : Remove datetime mean when the time reading is fixed(L146) - // Compute the sum of valid obs values where mask == 1 - int64_t sum = 0; - int count = 0; - for (size_t i = 0; i < seconds.size(); ++i) { - for (size_t j = 0; j < seconds[0].size(); ++j) { - if (mask[i][j] == 1) { - sum += seconds[i][j]; - count++; - } - } - } - // Calculate the average and store datetime - // Replace the seconds_s to 0 when count is zero - int64_t seconds_s = count != 0 ? static_cast(sum / count) : 0; - // number of obs after subsampling int nobs = sst_s.size() * sst_s[0].size(); @@ -211,8 +202,7 @@ namespace gdasapp { iodaVars.obsVal_(loc) = sst_s[i][j]; iodaVars.obsError_(loc) = obserror_s[i][j]; iodaVars.preQc_(loc) = 0; - // Store averaged datetime (seconds) - iodaVars.datetime_(loc) = seconds_s; + iodaVars.datetime_(loc) = seconds_s[i][j]; // Store optional metadata, set ocean basins to -999 for now iodaVars.intMetadata_.row(loc) << -999; loc += 1; diff --git a/utils/test/testref/ghrsst2ioda.test b/utils/test/testref/ghrsst2ioda.test index 75a0764e4..1413f9d04 100644 --- a/utils/test/testref/ghrsst2ioda.test +++ b/utils/test/testref/ghrsst2ioda.test @@ -21,6 +21,6 @@ latitude: Max: 77.95 Sum: 623.423 datetime: - Min: 1269445063 - Max: 1269445063 - Sum: 10155560504 + Min: 1269445504 + Max: 1269445504 + Sum: 10155564032 From 420459c8d8bee0608a882becd809737b75713256 Mon Sep 17 00:00:00 2001 From: Guillaume Vernieres Date: Mon, 15 Apr 2024 17:35:10 -0400 Subject: [PATCH 8/8] Save basic stats in csv at each cycle (#1040) ### Work done Computes basic stats (count, bias and rmse of omb's and oma's) for different ocean basins, this assumes the obs were flagged with the ocean mask from `RECCAP2_region_masks_all_v20221025.nc`. - The stats are computed within the post ocean analysis jjob. Failure of this job will only results in a warning. - I provided a simple plotting script that will accept multiple experiments as arguments and plot time series. Not sure if we want this in the vrfy job ### Work left to do We're not flagging the obs currently, so that code will do nothing until we update the prep ocean obs yaml to point to `RECCAP2_region_masks_all_v20221025.nc` (it's been copied to the soca fix dir on hera and orion). To have the obs flagged in develop will require the ioda converters to not fail if the `RECCAP2_region_masks_all_v20221025` isn't found. --- parm/soca/obs/obs_stats.yaml.j2 | 7 + scripts/exgdas_global_marine_analysis_post.py | 77 ++++++- ush/soca/gdassoca_obsstats.py | 97 ++++++++ utils/soca/CMakeLists.txt | 6 + utils/soca/gdassoca_obsstats.cc | 10 + utils/soca/gdassoca_obsstats.h | 212 ++++++++++++++++++ 6 files changed, 406 insertions(+), 3 deletions(-) create mode 100644 parm/soca/obs/obs_stats.yaml.j2 create mode 100644 ush/soca/gdassoca_obsstats.py create mode 100644 utils/soca/gdassoca_obsstats.cc create mode 100644 utils/soca/gdassoca_obsstats.h diff --git a/parm/soca/obs/obs_stats.yaml.j2 b/parm/soca/obs/obs_stats.yaml.j2 new file mode 100644 index 000000000..f1359b2df --- /dev/null +++ b/parm/soca/obs/obs_stats.yaml.j2 @@ -0,0 +1,7 @@ +time window: + begin: '1900-01-01T00:00:00Z' + end: '2035-01-01T00:00:00Z' + bound to include: begin + +obs spaces: + {{ obs_spaces }} diff --git a/scripts/exgdas_global_marine_analysis_post.py b/scripts/exgdas_global_marine_analysis_post.py index 6e30f7cad..e527509fd 100755 --- a/scripts/exgdas_global_marine_analysis_post.py +++ b/scripts/exgdas_global_marine_analysis_post.py @@ -22,7 +22,11 @@ import glob import shutil from datetime import datetime, timedelta -from wxflow import FileHandler, Logger +from wxflow import AttrDict, FileHandler, Logger, parse_j2yaml +from multiprocessing import Process +import subprocess +import netCDF4 +import re logger = Logger() @@ -36,8 +40,6 @@ def list_all_files(dir_in, dir_out, wc='*', fh_list=[]): return fh_list -logger.info(f"---------------- Copy from RUNDIR to COMOUT") - com_ocean_analysis = os.getenv('COM_OCEAN_ANALYSIS') com_ice_restart = os.getenv('COM_ICE_RESTART') anl_dir = os.getenv('DATA') @@ -52,6 +54,8 @@ def list_all_files(dir_in, dir_out, wc='*', fh_list=[]): bdate = datetime.strftime(bdatedt, '%Y-%m-%dT%H:00:00Z') mdate = datetime.strftime(datetime.strptime(cdate, '%Y%m%d%H'), '%Y-%m-%dT%H:00:00Z') +logger.info(f"---------------- Copy from RUNDIR to COMOUT") + post_file_list = [] # Make a copy the IAU increment @@ -106,3 +110,70 @@ def list_all_files(dir_in, dir_out, wc='*', fh_list=[]): os.path.join(com_ocean_analysis, 'yaml'), wc='*.yaml', fh_list=fh_list) FileHandler({'copy': fh_list}).sync() + +# obs space statistics +logger.info(f"---------------- Compute basic stats") +diags_list = glob.glob(os.path.join(os.path.join(com_ocean_analysis, 'diags', '*.nc4'))) +obsstats_j2yaml = str(os.path.join(os.getenv('HOMEgfs'), 'sorc', 'gdas.cd', + 'parm', 'soca', 'obs', 'obs_stats.yaml.j2')) + + +# function to create a minimalist ioda obs sapce +def create_obs_space(data): + os_dict = {"obs space": { + "name": data["obs_space"], + "obsdatain": { + "engine": {"type": "H5File", "obsfile": data["obsfile"]} + }, + "simulated variables": [data["variable"]] + }, + "variable": data["variable"], + "experiment identifier": data["pslot"], + "csv output": data["csv_output"] + } + return os_dict + + +# attempt to extract the experiment id from the path +pslot = os.path.normpath(com_ocean_analysis).split(os.sep)[-5] + +# iterate through the obs spaces and generate the yaml for gdassoca_obsstats.x +obs_spaces = [] +for obsfile in diags_list: + + # define an obs space name + obs_space = re.sub(r'\.\d{10}\.nc4$', '', os.path.basename(obsfile)) + + # get the variable name, assume 1 variable per file + nc = netCDF4.Dataset(obsfile, 'r') + variable = next(iter(nc.groups["ObsValue"].variables)) + nc.close() + + # filling values for the templated yaml + data = {'obs_space': os.path.basename(obsfile), + 'obsfile': obsfile, + 'pslot': pslot, + 'variable': variable, + 'csv_output': os.path.join(com_ocean_analysis, + f"{RUN}.t{cyc}z.ocn.{obs_space}.stats.csv")} + obs_spaces.append(create_obs_space(data)) + +# create the yaml +data = {'obs_spaces': obs_spaces} +conf = parse_j2yaml(path=obsstats_j2yaml, data=data) +stats_yaml = 'diag_stats.yaml' +conf.save(stats_yaml) + +# run the application +# TODO(GorA): this should be setup properly in the g-w once gdassoca_obsstats is in develop +gdassoca_obsstats_exec = os.path.join(os.getenv('HOMEgfs'), + 'sorc', 'gdas.cd', 'build', 'bin', 'gdassoca_obsstats.x') +command = f"{os.getenv('launcher')} {gdassoca_obsstats_exec} {stats_yaml}" +logger.info(f"{command}") +result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + +# issue a warning if the process has failed +if result.returncode != 0: + logger.warning(f"{command} has failed") +if result.stderr: + print("STDERR:", result.stderr.decode()) diff --git a/ush/soca/gdassoca_obsstats.py b/ush/soca/gdassoca_obsstats.py new file mode 100644 index 000000000..4741687bb --- /dev/null +++ b/ush/soca/gdassoca_obsstats.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 + + +# creates figures of timeseries from the csv outputs computed by gdassoca_obsstats.x +import argparse +from itertools import product +import os +import glob +import pandas as pd +import matplotlib.pyplot as plt +import matplotlib.dates as mdates + + +class ObsStats: + def __init__(self): + self.data = pd.DataFrame() + + def read_csv(self, filepaths): + # Iterate through the list of file paths and append their data + for filepath in filepaths: + new_data = pd.read_csv(filepath) + # Convert date to datetime for easier plotting + new_data['date'] = pd.to_datetime(new_data['date'], format='%Y%m%d%H') + self.data = pd.concat([self.data, new_data], ignore_index=True) + + def plot_timeseries(self, ocean, variable): + # Filter data for the given ocean and variable + filtered_data = self.data[(self.data['Ocean'] == ocean) & (self.data['Variable'] == variable)] + + if filtered_data.empty: + print("No data available for the given ocean and variable combination.") + return + + # Get unique experiments + experiments = filtered_data['Exp'].unique() + + # Plot settings + fig, axs = plt.subplots(3, 1, figsize=(10, 15), sharex=True) + fig.suptitle(f'{variable} statistics, {ocean} ocean', fontsize=16, fontweight='bold') + + for exp in experiments: + exp_data = filtered_data[filtered_data['Exp'] == exp] + + # Plot RMSE + axs[0].plot(exp_data['date'], exp_data['RMSE'], marker='o', linestyle='-', label=exp) + axs[0].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H')) + axs[0].xaxis.set_major_locator(mdates.DayLocator()) + axs[0].tick_params(labelbottom=False) + axs[0].set_ylabel('RMSE') + axs[0].legend() + axs[0].grid(True) + + # Plot Bias + axs[1].plot(exp_data['date'], exp_data['Bias'], marker='o', linestyle='-', label=exp) + axs[1].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H')) + axs[1].xaxis.set_major_locator(mdates.DayLocator()) + axs[1].tick_params(labelbottom=False) + axs[1].set_ylabel('Bias') + axs[1].legend() + axs[1].grid(True) + + # Plot Count + axs[2].plot(exp_data['date'], exp_data['Count'], marker='o', linestyle='-', label=exp) + axs[2].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H')) + axs[2].xaxis.set_major_locator(mdates.DayLocator()) + axs[2].set_ylabel('Count') + axs[2].legend() + axs[2].grid(True) + + # Improve layout and show plot + plt.tight_layout(rect=[0, 0.03, 1, 0.95]) + plt.savefig(f'{ocean}_test.png') + + +if __name__ == "__main__": + epilog = ["Usage examples: ./gdassoca_obsstats.py --exp gdas.*.csv"] + parser = argparse.ArgumentParser(description="Observation space RMSE's and BIAS's", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=os.linesep.join(epilog)) + parser.add_argument("--exps", nargs='+', required=True, + help="Path to the experiment's COMROOT") + parser.add_argument("--variable", required=True, help="ombg_qc or ombg_noqc") + parser.add_argument("--figname", required=True, help="The name of the output figure") + args = parser.parse_args() + + flist = [] + for exp in args.exps: + print(exp) + wc = exp + '/*.*/??/analysis/ocean/*.t??z.stats.csv' + flist.append(glob.glob(wc)) + + flist = sum(flist, []) + obsStats = ObsStats() + obsStats.read_csv(flist) + for var, ocean in product(['ombg_noqc', 'ombg_qc'], + ['Atlantic', 'Pacific', 'Indian', 'Arctic', 'Southern']): + obsStats.plot_timeseries(ocean, var) diff --git a/utils/soca/CMakeLists.txt b/utils/soca/CMakeLists.txt index dd29ad7ab..39da43a66 100644 --- a/utils/soca/CMakeLists.txt +++ b/utils/soca/CMakeLists.txt @@ -17,3 +17,9 @@ ecbuild_add_executable( TARGET gdas_socahybridweights.x SOURCES gdas_socahybridweights.cc ) target_compile_features( gdas_socahybridweights.x PUBLIC cxx_std_17) target_link_libraries( gdas_socahybridweights.x PUBLIC NetCDF::NetCDF_CXX oops atlas soca) + +# Obs Stats +ecbuild_add_executable( TARGET gdassoca_obsstats.x + SOURCES gdassoca_obsstats.cc ) +target_compile_features( gdassoca_obsstats.x PUBLIC cxx_std_17) +target_link_libraries( gdassoca_obsstats.x PUBLIC NetCDF::NetCDF_CXX oops ioda) diff --git a/utils/soca/gdassoca_obsstats.cc b/utils/soca/gdassoca_obsstats.cc new file mode 100644 index 000000000..de78e94f1 --- /dev/null +++ b/utils/soca/gdassoca_obsstats.cc @@ -0,0 +1,10 @@ +#include "gdassoca_obsstats.h" +#include "oops/runs/Run.h" + +// this application computes a few basic statistics in obs space + +int main(int argc, char ** argv) { + oops::Run run(argc, argv); + gdasapp::ObsStats obsStats; + return run.execute(obsStats); +} diff --git a/utils/soca/gdassoca_obsstats.h b/utils/soca/gdassoca_obsstats.h new file mode 100644 index 000000000..98e70bf3b --- /dev/null +++ b/utils/soca/gdassoca_obsstats.h @@ -0,0 +1,212 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eckit/config/LocalConfiguration.h" + +#include "ioda/Engines/EngineUtils.h" +#include "ioda/Group.h" +#include "ioda/ObsDataIoParameters.h" +#include "ioda/ObsGroup.h" +#include "ioda/ObsSpace.h" +#include "ioda/ObsVector.h" + +#include "oops/base/PostProcessor.h" +#include "oops/mpi/mpi.h" +#include "oops/runs/Application.h" +#include "oops/util/DateTime.h" +#include "oops/util/Duration.h" +#include "oops/util/Logger.h" +#include "oops/util/missingValues.h" +#include "oops/util/TimeWindow.h" + +namespace gdasapp { + class ObsStats : public oops::Application { + public: + // ----------------------------------------------------------------------------- + explicit ObsStats(const eckit::mpi::Comm & comm = oops::mpi::world()) + : Application(comm), fillVal_(util::missingValue()) { + oceans_["Atlantic"] = 1; + oceans_["Pacific"] = 2; + oceans_["Indian"] = 3; + oceans_["Arctic"] = 4; + oceans_["Southern"] = 5; + } + + // ----------------------------------------------------------------------------- + int execute(const eckit::Configuration & fullConfig, bool /*validate*/) const { + // time window + const eckit::LocalConfiguration timeWindowConf(fullConfig, "time window"); + const util::TimeWindow timeWindow(timeWindowConf); + + // get the list of obs spaces to process + std::vector obsSpaces; + fullConfig.get("obs spaces", obsSpaces); + + // check if the number of workers is correct. Only the serial and 1 diag file + // per pe works for now + ASSERT(getComm().size() <= 1 || getComm().size() >= obsSpaces.size()); + + // initialize pe's color and communicator's name + int color(0); + std::string commNameStr = "comm_idle"; + if (this->getComm().rank() < obsSpaces.size()) { + color = 1; + std::string commNameStr = "comm_work"; + } + + // Create the new communicator () + char const *commName = commNameStr.c_str(); + eckit::mpi::Comm & commObsSpace = this->getComm().split(color, commName); + + // spread out the work over pes + if (color > 0) { + // get the obs space configuration + auto obsSpace = obsSpaces[commObsSpace.rank()]; + eckit::LocalConfiguration obsConfig(obsSpace, "obs space"); + + // get the obs diag file + std::string obsFile; + obsConfig.get("obsdatain.engine.obsfile", obsFile); + oops::Log::info() << "========= Processing" << obsFile + << " date: " << extractDateFromFilename(obsFile) + << std::endl; + + // what variable to compute the stats for + std::string variable; + obsSpace.get("variable", variable); + + // read the obs space + ioda::ObsSpace ospace(obsConfig, commObsSpace, timeWindow, + oops::mpi::myself()); + const size_t nlocs = ospace.nlocs(); + oops::Log::info() << "nlocs =" << nlocs << std::endl; + std::vector var(nlocs); + std::string group = "ombg"; + ospace.get_db(group, variable, var); + + // ocean basin partitioning + std::vector oceanBasins(nlocs); + ospace.get_db("MetaData", "oceanBasin", oceanBasins); + + // Open an ofstream for output and write header + std::string expId; + obsSpace.get("experiment identifier", expId); + std::string fileName; + obsSpace.get("csv output", fileName); + std::ofstream outputFile(fileName); + outputFile << "Exp,Variable,Ocean,date,RMSE,Bias,Count\n"; + + // get the date + int dateint = extractDateFromFilename(obsFile); + + // Pre QC'd stats + oops::Log::info() << "========= Pre QC" << std::endl; + std::string varname = group+"_noqc"; + std::vector PreQC(nlocs); + ospace.get_db("PreQC", variable, PreQC); + stats(var, oceanBasins, PreQC, outputFile, varname, dateint, expId); + + oops::Log::info() << "========= Effective QC" << std::endl; + varname = group+"_qc"; + std::vector eQC(nlocs); + ospace.get_db("EffectiveQC1", variable, eQC); + stats(var, oceanBasins, eQC, outputFile, varname, dateint, expId); + + // Close the file + outputFile.close(); + } + getComm().barrier(); + return EXIT_SUCCESS; + } + + // ----------------------------------------------------------------------------- + static const std::string classname() {return "gdasapp::IodaExample";} + + // ----------------------------------------------------------------------------- + void stats(const std::vector& ombg, + const std::vector& oceanBasins, + const std::vector& qc, + std::ofstream& outputFile, + const std::string varname, + const int dateint, + const std::string expId) const { + float rmseGlobal(0.0); + float biasGlobal(0.0); + int cntGlobal(0); + + for (const auto& ocean : oceans_) { + float rmse(0.0); + float bias(0.0); + int cnt(0); + for (size_t i = 0; i < ombg.size(); ++i) { + if (ombg[i] != fillVal_ && + oceanBasins[i] == ocean.second && + qc[i] == 0) { + rmse += std::pow(ombg[i], 2); + bias += ombg[i]; + cnt += 1; + } + } + if (cnt > 0) { // Ensure division by cnt is valid + rmseGlobal += rmse; + biasGlobal += bias; + cntGlobal += cnt; + rmse = std::sqrt(rmse / cnt); + bias = bias / cnt; + + outputFile << expId << "," + << varname << "," + << ocean.first << "," + << dateint << "," + << rmse << "," + << bias << "," + << cnt << "\n"; + } + } + if (cntGlobal > 0) { // Ensure division by cntGlobal is valid + outputFile << expId << "," + << varname << "," + << "Global," + << dateint << "," + << std::sqrt(rmseGlobal / cntGlobal) << "," + << biasGlobal / cntGlobal << "," + << cntGlobal << "\n"; + } + } + + // ----------------------------------------------------------------------------- + // Function to extract the date from the filename + int extractDateFromFilename(const std::string& filename) const { + if (filename.length() < 14) { + throw std::invalid_argument("Filename is too short to contain a valid date."); + } + std::string dateString = filename.substr(filename.length() - 14, 10); + + // Convert the extracted date string to an integer + int date = std::stoi(dateString); + return date; + } + // ----------------------------------------------------------------------------- + private: + std::string appname() const { + return "gdasapp::ObsStats"; + } + + // ----------------------------------------------------------------------------- + // Data members + std::map oceans_; + double fillVal_; + // ----------------------------------------------------------------------------- + }; + +} // namespace gdasapp