# Old Copy

We started this Python notebook to diagnose problems within our Zarr stores. This is a version of the notebook that Sebastian used to debug issues we had in the past. We are keeping a copy of this version as documentation of some of the past errors we've had.

In [1]:
from run_tests import run, check_model
import os

import xarray as xr
import numpy as np
from datetime import timedelta
import cftime

In [2]:
DIR_DATA = f"/projects/dgs/persad_research/SIMULATION_DATA/DATA/RAMIP/"
DIR_ZARR = f"/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/"

# Testing Within A Model

First we're going to check if all the things that should be consistent within a *model's* ZARR stores are consistent. 

## CESM2 

In [3]:
path = '/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/'
model = 'CESM2'

# Gather all of the zarr files that start with the model name
zarr_files = [path + f for f in os.listdir(path) if f.startswith(model)]

check_model(zarr_files, verbose=True)

Checking 25 datasets: ['CESM2_ssp370-sas126aer_day_pr.zarr', 'CESM2_historical_day_sfcWind.zarr', 'CESM2_historical_day_tasmax.zarr', 'CESM2_ssp126_day_hurs.zarr', 'CESM2_ssp370-eas126aer_day_tasmax.zarr', 'CESM2_ssp370-afr126aer_day_pr.zarr', 'CESM2_ssp370-eas126aer_day_pr.zarr', 'CESM2_ssp370-sas126aer_day_hurs.zarr', 'CESM2_ssp370-126aer_day_tasmax.zarr', 'CESM2_ssp370-126aer_day_pr.zarr', 'CESM2_ssp370-afr126aer_day_hurs.zarr', 'CESM2_ssp370-nae126aer_day_pr.zarr', 'CESM2_ssp370_day_tasmax.zarr', 'CESM2_ssp126_day_tasmax.zarr', 'CESM2_historical_day_pr.zarr', 'CESM2_ssp370-afr126aer_day_tasmax.zarr', 'CESM2_ssp370-nae126aer_day_tasmax.zarr', 'CESM2_ssp370_day_hurs.zarr', 'CESM2_ssp370-eas126aer_day_hurs.zarr', 'CESM2_historical_day_hurs.zarr', 'CESM2_ssp126_day_pr.zarr', 'CESM2_ssp370-sas126aer_day_tasmax.zarr', 'CESM2_ssp370_day_pr.zarr', 'CESM2_ssp370-nae126aer_day_hurs.zarr', 'CESM2_ssp370-126aer_day_hurs.zarr']
Spatial dims are not the same
Spatial dims are not the same
Spatial

In [4]:
historical = xr.open_zarr(f"/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/CESM2_historical_day_hurs.zarr")
ssp370 = xr.open_zarr(f"/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/CESM2_ssp370_day_hurs.zarr")
np.array_equal(historical.lat, ssp370.lat)

True

The latitude coordinates of our CESM2 historical experiment is the same as the latitude coordinates of the rest of our CESM2 experiments.

In [5]:
historical.dims



In [6]:
ssp370.dims



Our CESM2 historical experiment has different spatial dimensions than the rest because the historical data has 'lev' while the rest does not.

## MRI-ESM2-0 

In [7]:
path = '/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/'
model = 'MRI-ESM2-0'

# Gather all of the zarr files that start with the model name
zarr_files = [path + f for f in os.listdir(path) if f.startswith(model)]

check_model(zarr_files, verbose = True)

Checking 2 datasets: ['MRI-ESM2-0_ssp370_day_pr.zarr', 'MRI-ESM2-0_ssp370_day_hurs.zarr']


SUMMARY: 3/3 checks passed.
[1m[32mMonotonic Check passed: [0m[32mTime coordinates are monotonic.
[0m[1m[32mCalendar Check passed: [0m[32mTime coordinates use the same calendar across all datasets.
[0m[1m[32mSpatial Coord Check passed: [0m[32mSpatial coordinates are equivalent across all datasets.
[0m


## NorESM2-LM 

In [8]:
path = '/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/'
model = 'NorESM2-LM'

# Gather all of the zarr files that start with the model name
zarr_files = [path + f for f in os.listdir(path) if f.startswith(model)]

check_model(zarr_files, verbose=True)

Checking 32 datasets: ['NorESM2-LM_ssp370-eas126aer_day_pr.zarr', 'NorESM2-LM_ssp370-afr126aer_day_tasmax.zarr', 'NorESM2-LM_ssp370-sas126aer_day_hurs.zarr', 'NorESM2-LM_ssp370-nae126aer_day_tasmax.zarr', 'NorESM2-LM_ssp370_day_pr.zarr', 'NorESM2-LM_ssp126_day_pr.zarr', 'NorESM2-LM_ssp370-afr126aer_day_pr.zarr', 'NorESM2-LM_ssp370-eas126aer_day_sfcWind.zarr', 'NorESM2-LM_historical_day_hurs.zarr', 'NorESM2-LM_ssp370-afr126aer_day_sfcWind.zarr', 'NorESM2-LM_ssp370_day_sfcWind.zarr', 'NorESM2-LM_ssp370-sas126aer_day_tasmax.zarr', 'NorESM2-LM_ssp370-nae126aer_day_sfcWind.zarr', 'NorESM2-LM_ssp126_day_hurs.zarr', 'NorESM2-LM_historical_day_sfcWind.zarr', 'NorESM2-LM_ssp370-sas126aer_day_pr.zarr', 'NorESM2-LM_ssp370-126aer_day_sfcWind.zarr', 'NorESM2-LM_ssp370-nae126aer_day_hurs.zarr', 'NorESM2-LM_ssp126_day_sfcWind.zarr', 'NorESM2-LM_ssp370-eas126aer_day_hurs.zarr', 'NorESM2-LM_ssp370-126aer_day_hurs.zarr', 'NorESM2-LM_ssp370-126aer_day_pr.zarr', 'NorESM2-LM_ssp126_day_tasmax.zarr', 'NorES

In [9]:
NorESM2_ssp126 = xr.open_zarr(f"{DIR_ZARR}NorESM2-LM_ssp126_day_pr.zarr")
NorESM2_ssp126.time.to_index()

CFTimeIndex([2015-01-01 12:00:00, 2015-01-02 12:00:00, 2015-01-03 12:00:00,
             2015-01-04 12:00:00, 2015-01-05 12:00:00, 2015-01-06 12:00:00,
             2015-01-07 12:00:00, 2015-01-08 12:00:00, 2015-01-09 12:00:00,
             2015-01-10 12:00:00,
             ...
             2100-12-22 12:00:00, 2100-12-23 12:00:00, 2100-12-24 12:00:00,
             2100-12-25 12:00:00, 2100-12-26 12:00:00, 2100-12-27 12:00:00,
             2100-12-28 12:00:00, 2100-12-29 12:00:00, 2100-12-30 12:00:00,
             2100-12-31 12:00:00],
            dtype='object', length=42337, calendar='noleap', freq=None)

In [10]:
NorESM2_ssp126.time.values

array([cftime.DatetimeNoLeap(2015, 1, 1, 12, 0, 0, 0, has_year_zero=True),
       cftime.DatetimeNoLeap(2015, 1, 2, 12, 0, 0, 0, has_year_zero=True),
       cftime.DatetimeNoLeap(2015, 1, 3, 12, 0, 0, 0, has_year_zero=True),
       ...,
       cftime.DatetimeNoLeap(2100, 12, 29, 12, 0, 0, 0, has_year_zero=True),
       cftime.DatetimeNoLeap(2100, 12, 30, 12, 0, 0, 0, has_year_zero=True),
       cftime.DatetimeNoLeap(2100, 12, 31, 12, 0, 0, 0, has_year_zero=True)],
      dtype=object)

In [11]:
ds = NorESM2_ssp126

# Function to generate 'noleap' calendar dates
def generate_noleap_date_range(start_date, end_date):
    """Generate a range of dates for the 'noleap' calendar."""
    # Create noleap dates using cftime, which supports custom calendars
    return xr.cftime_range(start=start_date, end=end_date, freq='D', calendar='noleap')

# Assuming 'time' is the time coordinate in your dataset, and it's using the 'noleap' calendar
start_date = ds['time'].min().item()  # Get the first date in the dataset
end_date = ds['time'].max().item()    # Get the last date in the dataset

# Generate the full 'noleap' date range
full_date_range = generate_noleap_date_range(start_date, end_date)

# Convert the dataset's time coordinate to cftime objects (for compatibility with 'noleap')
dataset_dates = ds['time'].values

# Find missing dates
missing_dates = [date for date in full_date_range if date not in dataset_dates]

if len(missing_dates) == 0:
    print("No missing days!")
else:
    print(f"Missing days: {missing_dates}")


No missing days!


We have 42337 time steps. However, for 85 years of data, we should only have 31025/31026 time steps. Let's check where we have monotonic time steps:

In [12]:
# Example array with a violation
array = ds.time.to_index()

# Get the difference between consecutive elements
differences = np.diff(array)

# Find where differences are negative (indicating a violation)
violations = np.where(differences < timedelta(days=0))[0]

if len(violations) == 0:
    print("The array is monotonically increasing.")
else:
    print(f"Monotonicity violated at indices: {violations + 1}")
    print(f"Values causing violation: {array[violations]} -> {array[violations + 1]}")


Monotonicity violated at indices: [ 9489 24088 35037]
Values causing violation: CFTimeIndex([2040-12-30 12:00:00, 2070-12-30 12:00:00, 2090-12-30 12:00:00],
            dtype='object', length=3, calendar='noleap', freq=None) -> CFTimeIndex([2031-01-01 12:00:00, 2061-01-01 12:00:00, 2081-01-01 12:00:00],
            dtype='object', length=3, calendar='noleap', freq=None)


We have three instances where our time series is discontinuous. Let's see how that looks like:

Instance 1:

In [13]:
ds.time.to_index()[9488]

cftime.DatetimeNoLeap(2040, 12, 30, 12, 0, 0, 0, has_year_zero=True)

In [14]:
ds.time.to_index()[9489]

cftime.DatetimeNoLeap(2031, 1, 1, 12, 0, 0, 0, has_year_zero=True)

Instance 2:

In [15]:
ds.time.to_index()[24087]

cftime.DatetimeNoLeap(2070, 12, 30, 12, 0, 0, 0, has_year_zero=True)

In [16]:
ds.time.to_index()[24088]

cftime.DatetimeNoLeap(2061, 1, 1, 12, 0, 0, 0, has_year_zero=True)

Instance 3:

In [17]:
ds.time.to_index()[35036]

cftime.DatetimeNoLeap(2090, 12, 30, 12, 0, 0, 0, has_year_zero=True)

In [18]:
ds.time.to_index()[35037]

cftime.DatetimeNoLeap(2081, 1, 1, 12, 0, 0, 0, has_year_zero=True)

## SPEAR 

In [19]:
path = '/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/'
model = 'SPEAR'

# Gather all of the zarr files that start with the model name
zarr_files = [path + f for f in os.listdir(path) if f.startswith(model)]

check_model(zarr_files)



SUMMARY: 3/3 checks passed.
[1m[32mMonotonic Check passed: [0m[32mTime coordinates are monotonic.
[0m[1m[32mCalendar Check passed: [0m[32mTime coordinates use the same calendar across all datasets.
[0m[1m[32mSpatial Coord Check passed: [0m[32mSpatial coordinates are equivalent across all datasets.
[0m


# Test Within Variables

In [20]:
path = '/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/'
variable = 'hurs'

# Gather all the zarr files that include the variable name
zarr_files = [path + f for f in os.listdir(path) if variable in f]
print(zarr_files)
run(zarr_files)

['/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/NorESM2-LM_ssp370-sas126aer_day_hurs.zarr', '/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/SPEAR_ssp370-nae126aer_day_hurs.zarr', '/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/SPEAR_ssp370-eas126aer_day_hurs.zarr', '/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/CESM2_ssp126_day_hurs.zarr', '/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/NorESM2-LM_historical_day_hurs.zarr', '/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/CanESM5-1_ssp370_day_hurs.zarr', '/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/NorESM2-LM_ssp126_day_hurs.zarr', '/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/CESM2_ssp370-sas126aer_day_hurs.zarr', '/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/SPEAR_ssp370-afr126aer_day_hurs.zarr', '/projects/dgs/pe