I am attempting to run any REF diagnostic with very-preliminary model run results, as mentioned in #678. While I've been able to ingest a CMIP7 dataset, I get the following error when attempting to run the global-mean-timeseries diagnostic.
; 2026-05-25 21:15:56.706 +00:00 | INFO | climate_ref.database - Creating backup of database at /fs/site7/eccc/crd/cccma/users/scrd115/REF_v3/climate-ref/.ref/db/backups/climate_ref_20260525_211556.db
; 2026-05-25 21:15:56.709 +00:00 | INFO | climate_ref.database - Removing old backup /fs/site7/eccc/crd/cccma/users/scrd115/REF_v3/climate-ref/.ref/db/backups/climate_ref_20260525_210428.db
; 2026-05-25 21:16:03.968 +00:00 | INFO | climate_ref.solver - Solving for diagnostics that require recalculation...
; 2026-05-25 21:16:03.974 +00:00 | INFO | climate_ref.solver - Solving esmvaltool/global-mean-timeseries
; ╭──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── Traceback (most recent call last) ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
; │ /fs/site7/eccc/crd/cccma/users/scrd115/REF_v3/climate-ref/packages/climate-ref/src/climate_ref/cli/solve.py:104 in solve │
; │ │
; │ 101 │ │ dataset=parsed_dataset_filters, │
; │ 102 │ ) │
; │ 103 │ │
; │ ❱ 104 │ solve_required_executions( │
; │ 105 │ │ config=config, │
; │ 106 │ │ db=db, │
; │ 107 │ │ dry_run=dry_run, │
; │ │
; │ /fs/site7/eccc/crd/cccma/users/scrd115/REF_v3/climate-ref/packages/climate-ref/src/climate_ref/solver.py:635 in solve_required_executions │
; │ │
; │ 632 │ │ │ ).join(DiagnosticModel.provider) │
; │ 633 │ │ } │
; │ 634 │ │
; │ ❱ 635 │ for potential_execution in solver.solve(filters): │
; │ 636 │ │ definition = potential_execution.build_execution_definition(output_root=config.p │
; │ 637 │ │ provider_slug = potential_execution.provider.slug │
; │ 638 │ │ diagnostic_full_slug = potential_execution.diagnostic.full_slug() │
; │ │
; │ /fs/site7/eccc/crd/cccma/users/scrd115/REF_v3/climate-ref/packages/climate-ref/src/climate_ref/solver.py:519 in solve │
; │ │
; │ 516 │ │ │ │ │ continue │
; │ 517 │ │ │ │ logger.info(f"Solving {diagnostic.full_slug()}") │
; │ 518 │ │ │ │ try: │
; │ ❱ 519 │ │ │ │ │ yield from solve_executions(data_catalog, diagnostic, provider) │
; │ 520 │ │ │ │ except InvalidDiagnosticException as e: │
; │ 521 │ │ │ │ │ # Skip diagnostics that don't have matching data │
; │ 522 │ │ │ │ │ logger.debug(f"Skipping {diagnostic.full_slug()}: {e}") │
; │ │
; │ /fs/site7/eccc/crd/cccma/users/scrd115/REF_v3/climate-ref/packages/climate-ref/src/climate_ref/solver.py:290 in solve_executions │
; │ │
; │ 287 │ │ │ │ raise TypeError(f"Expected a sequence of DataRequirement, got {type(requ │
; │ 288 │ │ │ # Buffer executions to check if any were actually produced │
; │ 289 │ │ │ # _solve_from_data_requirements returns empty if source types are missing │
; │ ❱ 290 │ │ │ executions = list( │
; │ 291 │ │ │ │ _solve_from_data_requirements(data_catalog, diagnostic, requirement_coll │
; │ 292 │ │ │ ) │
; │ 293 │ │ │ if executions: │
; │ │
; │ /fs/site7/eccc/crd/cccma/users/scrd115/REF_v3/climate-ref/packages/climate-ref/src/climate_ref/solver.py:326 in _solve_from_data_requirements │
; │ │
; │ 323 │ │ │ ) │
; │ 324 │ │ │ return │
; │ 325 │ │ │
; │ ❱ 326 │ │ dataset_groups[requirement.source_type] = extract_covered_datasets( │
; │ 327 │ │ │ data_catalog[requirement.source_type], requirement │
; │ 328 │ │ ) │
; │ 329 │
; │ │
; │ /fs/site7/eccc/crd/cccma/users/scrd115/REF_v3/climate-ref/packages/climate-ref/src/climate_ref/solver.py:187 in extract_covered_datasets │
; │ │
; │ 184 │ │ logger.error(f"No datasets found in the data catalog: {requirement.source_type.v │
; │ 185 │ │ return {} │
; │ 186 │ │
; │ ❱ 187 │ subset = requirement.apply_filters(catalog_df) │
; │ 188 │ │
; │ 189 │ if len(subset) == 0: │
; │ 190 │ │ logger.debug(f"No datasets found for requirement {requirement}") │
; │ │
; │ /fs/site7/eccc/crd/cccma/users/scrd115/REF_v3/climate-ref/packages/climate-ref-core/src/climate_ref_core/diagnostics.py:407 in apply_filters │
; │ │
; │ 404 │ │ │ │ clean_value = value if isinstance(value, tuple) else (value,) │
; │ 405 │ │ │ │ │
; │ 406 │ │ │ │ if facet not in data_catalog.columns: │
; │ ❱ 407 │ │ │ │ │ raise KeyError( │
; │ 408 │ │ │ │ │ │ f"Facet {facet!r} not in data catalog columns: {data_catalog.col │
; │ 409 │ │ │ │ │ ) │
; │ 410 │ │ │ │ values[facet] = clean_value │
; ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
; KeyError: "Facet 'branded_variable' not in data catalog columns: ['start_time', 'end_time', 'path', 'tracking_id', 'activity_id', 'institution_id', 'source_id', 'experiment_id', 'variant_label', 'variable_id', 'grid_label', 'frequency', 'region', 'branding_suffix', 'version', 'mip_era', 'realm', 'nominal_resolution', 'license_id',
; 'external_variables', 'branch_time_in_child', 'branch_time_in_parent', 'parent_activity_id', 'parent_experiment_id', 'parent_mip_era', 'parent_source_id', 'parent_time_units', 'parent_variant_label', 'standard_name', 'long_name', 'units', 'time_units', 'calendar', 'finalised', 'instance_id']"
# remove existing database
rm $REF_DIR/climate-ref/.ref/db/climate_ref.db
# ingest test datasets
# cmip6
ref datasets ingest --source-type cmip6 /fs/site7/eccc/crd/crd_shared/ESGF_downloads/CMIP6/CMIP/*/CanESM5-1/piControl/r1i1p1f1/Amon/tas
###########################################################################
# solve diagnostics
# https://climate-ref.readthedocs.io/en/latest/getting-started/04-solve/
ref solve --diagnostic 'global-mean-timeseries' --provider esmvaltool
; 2026-05-25 21:11:27.177 +00:00 | INFO | climate_ref.solver - Running new execution for execution group: 'esmvaltool/global-mean-timeseries/cmip6_piControl_gn_r1i1p1f1_CanESM5-1_Amon_tas'
; 2026-05-25 21:11:27.191 +00:00 | ERROR | climate_ref.solver - No datasets found in the data catalog: cmip7
; 2026-05-25 21:11:27.192 +00:00 | INFO | climate_ref.solver - Solve complete
; 2026-05-25 21:11:27.192 +00:00 | INFO | climate_ref.solver - Found 1 new executions
; 2026-05-25 21:11:27.192 +00:00 | INFO | climate_ref.solver - esmvaltool/global-mean-timeseries: 1 new executions
; 2026-05-25 21:11:27.192 +00:00 | INFO | climate_ref.solver - esmvaltool: 1 new executions
; Waiting for executions to complete: 0%| | 0/1 [00:00<?, ?execution/s]2026-05-25 21:11:32.458 +00:00 | INFO | climate_ref_core.executor - Executing 'esmvaltool/global-mean-timeseries/cmip6_piControl_gn_r1i1p1f1_CanESM5-1_Amon_tas'
; 2026-05-25 21:11:32.458 +00:00 | WARNING | climate_ref_core.executor - Output directory /fs/site7/eccc/crd/cccma/users/scrd115/REF_v3/climate-ref/.ref/scratch/esmvaltool/global-mean-timeseries/piControl_gn_r1i1p1f1_CanESM5-1_Amon_tas_g1_v1_b0d1798d/1 already exists. Removing the existing directory.
; 2026-05-25 21:11:54.700 +00:00 | INFO | climate_ref.executor.result_handling - <climate_ref.models.execution.Execution object at 0x7f8aa321b890> successful
; Waiting for executions to complete: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:27<00:00, 27.56s/execution]
; 2026-05-25 21:11:54.758 +00:00 | INFO | climate_ref.executor.local - All executions completed successfully
; 2026-05-25 21:11:54.758 +00:00 | INFO | climate_ref.solver - All executions complete
##############################
# cmip7
ref datasets ingest --source-type cmip7 /space/hall7/sitestore/eccc/crd/cccma/users/rrd001/canesm_runs/c7-v60-may8-ref/data/nc_output/MIP-DRS7/CMIP7/CMIP/CCCma/CanESM6-0-rc1-MR-c7-v60-may8-ref/cccma-dev-exp/r1i1p1f1/glb/mon/tas
; 2026-05-25 21:13:43.353 +00:00 | INFO | climate_ref.database - Connecting to database at sqlite:////fs/site7/eccc/crd/cccma/users/scrd115/REF_v3/climate-ref/.ref/db/climate_ref.db
; 2026-05-25 21:13:43.372 +00:00 | INFO | climate_ref.database - Creating backup of database at /fs/site7/eccc/crd/cccma/users/scrd115/REF_v3/climate-ref/.ref/db/backups/climate_ref_20260525_211343.db
; 2026-05-25 21:13:43.373 +00:00 | INFO | climate_ref.database - Removing old backup /fs/site7/eccc/crd/cccma/users/scrd115/REF_v3/climate-ref/.ref/db/backups/climate_ref_20260525_210237.db
; 2026-05-25 21:13:43.480 +00:00 | INFO | climate_ref.cli.datasets - Ingesting /space/hall7/sitestore/eccc/crd/cccma/users/rrd001/canesm_runs/c7-v60-may8-ref/data/nc_output/MIP-DRS7/CMIP7/CMIP/CCCma/CanESM6-0-rc1-MR-c7-v60-may8-ref/cccma-dev-exp/r1i1p1f1/glb/mon/tas
; 2026-05-25 21:13:43.481 +00:00 | INFO | climate_ref.datasets.cmip7 - Using complete CMIP7 parser
; 2026-05-25 21:13:43.481 +00:00 | INFO | climate_ref.datasets.catalog_builder - Discovered 6 files matching ['*.nc'] in ['/space/hall7/sitestore/eccc/crd/cccma/users/rrd001/canesm_runs/c7-v60-may8-ref/data/nc_output/MIP-DRS7/CMIP7/CMIP/CCCma/CanESM6-0-rc1-MR-c7-v60-may8-ref/cccma-dev-exp/r1i1p1f1/glb/mon/tas']
; Parsing files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 37.95file/s]
; 2026-05-25 21:13:43.641 +00:00 | INFO | climate_ref.datasets.catalog_builder - Built catalog with 6 valid entries (0 invalid)
; 2026-05-25 21:13:43.649 +00:00 | INFO | climate_ref.cli.datasets - Found 6 files for 3 datasets
; activity_id institution_id source_id experiment_id variant_label region frequency variable_id branding_suffix grid_label version
; ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
; CMIP CCCma CanESM6-0-rc1-MR-c7-v60-may8-ref cccma-dev-exp r1i1p1f1 glb mon tas tavg-h2m-hxy-u gn v20190429
; CMIP CCCma CanESM6-0-rc1-MR-c7-v60-may8-ref cccma-dev-exp r1i1p1f1 glb mon tas tmaxavg-h2m-hxy-u gn v20190429
; CMIP CCCma CanESM6-0-rc1-MR-c7-v60-may8-ref cccma-dev-exp r1i1p1f1 glb mon tas tminavg-h2m-hxy-u gn v20190429
; 2026-05-25 21:13:43.687 +00:00 | INFO | climate_ref.datasets.base - Created new dataset: <Dataset slug=CMIP7.CMIP.CCCma.CanESM6-0-rc1-MR-c7-v60-may8-ref.cccma-dev-exp.r1i1p1f1.glb.mon.tas.tavg-h2m-hxy-u.gn.v20190429 dataset_type=SourceDatasetType.CMIP7 >
; ==== dataset <Dataset slug=CMIP7.CMIP.CCCma.CanESM6-0-rc1-MR-c7-v60-may8-ref.cccma-dev-exp.r1i1p1f1.glb.mon.tas.tavg-h2m-hxy-u.gn.v20190429 dataset_type=SourceDatasetType.CMIP7 >
; 2026-05-25 21:13:43.705 +00:00 | INFO | climate_ref.datasets.base - Created new dataset: <Dataset slug=CMIP7.CMIP.CCCma.CanESM6-0-rc1-MR-c7-v60-may8-ref.cccma-dev-exp.r1i1p1f1.glb.mon.tas.tmaxavg-h2m-hxy-u.gn.v20190429 dataset_type=SourceDatasetType.CMIP7 >
; ==== dataset <Dataset slug=CMIP7.CMIP.CCCma.CanESM6-0-rc1-MR-c7-v60-may8-ref.cccma-dev-exp.r1i1p1f1.glb.mon.tas.tmaxavg-h2m-hxy-u.gn.v20190429 dataset_type=SourceDatasetType.CMIP7 >
; 2026-05-25 21:13:43.718 +00:00 | INFO | climate_ref.datasets.base - Created new dataset: <Dataset slug=CMIP7.CMIP.CCCma.CanESM6-0-rc1-MR-c7-v60-may8-ref.cccma-dev-exp.r1i1p1f1.glb.mon.tas.tminavg-h2m-hxy-u.gn.v20190429 dataset_type=SourceDatasetType.CMIP7 >
; ==== dataset <Dataset slug=CMIP7.CMIP.CCCma.CanESM6-0-rc1-MR-c7-v60-may8-ref.cccma-dev-exp.r1i1p1f1.glb.mon.tas.tminavg-h2m-hxy-u.gn.v20190429 dataset_type=SourceDatasetType.CMIP7 >
; 2026-05-25 21:13:43.726 +00:00 | INFO | climate_ref.datasets - Datasets: 3/0/0 (created/updated/unchanged), Files: 6/0/0/0 (created/updated/removed/unchanged)
# After ingestion, list the datasets to verify:
ref datasets list
###########################################################################
# solve diagnostics
# https://climate-ref.readthedocs.io/en/latest/getting-started/04-solve/
ref solve --diagnostic 'global-mean-timeseries' --provider esmvaltool --dataset-filter source_id=CanESM6-0-rc1-MR-c7-v60-may8-ref
# error message posted above
$ sqlite3 /home/scrd115/site7/REF_v3/climate-ref/.ref/db/climate_ref.db
SQLite version 3.34.1 2021-01-20 14:10:07
sqlite> SELECT * from cmip7_dataset limit 1 ;
id activity_id institution_id source_id experiment_id variant_label variable_id grid_label frequency region branding_suffix version mip_era realm nominal_resolution branch_time_in_child branch_time_in_parent parent_activity_id parent_experiment_id parent_mip_era parent_source_id parent_time_units parent_variant_label standard_name long_name units instance_id license_id external_variables time_units calendar
-- ----------- -------------- -------------------------------- ------------- ------------- ----------- ---------- --------- ------ --------------- --------- ------- ----- ------------------ -------------------- --------------------- ------------------ -------------------- -------------- -------------------------------- --------------------- -------------------- --------------- ---------------------------- ----- ---------------------------------------------------------------------------------------------------------------- ---------- ------------------ ----------------------------- --------
2 CMIP CCCma CanESM6-0-rc1-MR-c7-v60-may8-ref cccma-dev-exp r1i1p1f1 tas gn mon glb tavg-h2m-hxy-u v20190429 CMIP7 atmos 500 km 292000.0 292000.0 CMIP cccma-dev-exp CMIP7 CanESM6-0-rc1-MR-c7-v60-may8-ref days since 1850-01-01 r1i1p1f1 air_temperature Near-Surface Air Temperature K CMIP7.CMIP.CCCma.CanESM6-0-rc1-MR-c7-v60-may8-ref.cccma-dev-exp.r1i1p1f1.glb.mon.tas.tavg-h2m-hxy-u.gn.v20190429 days since 1850-01-01 0:0:0.0 365_day
Describe the bug
I am attempting to run any REF diagnostic with very-preliminary model run results, as mentioned in #678. While I've been able to ingest a CMIP7 dataset, I get the following error when attempting to run the
global-mean-timeseriesdiagnostic.Full example