In [2]:
import os
import sys
import xarray as xr
import numpy as np

In [2]:
'''
Test 1: 

Dimensions include member as well as experiment. The plots should output combinations of these dimensions. 
'''

# make a dataset 

ds = xr.Dataset()
ds['time'] = xr.DataArray(np.arange(10), dims='time')
ds['member'] = xr.DataArray(np.arange(3), dims='member')
ds['experiment'] = xr.DataArray(['aer', 'ssp370'], dims='experiment')
ds['lat'] = xr.DataArray(np.arange(20), dims='lat')
ds['lon'] = xr.DataArray(np.arange(20), dims='lon')
ds['data'] = xr.DataArray(np.random.rand(10, 3, 2, 20, 20), dims=['time', 'member', 'experiment', 'lat', 'lon'])

# save to a zarr store in ./test_zarrs
ds.to_zarr('./test_zarrs/test1.zarr')

ContainsGroupError: path '' contains a group

In [7]:
'''
Test 2: 

What if the spatial dimensions include lat, lon, AND lev? 
'''

ds = xr.Dataset()
ds['time'] = xr.DataArray(np.arange(10), dims='time')
ds['member'] = xr.DataArray(np.arange(3), dims='member')
ds['lat'] = xr.DataArray(np.arange(20), dims='lat')
ds['lon'] = xr.DataArray(np.arange(20), dims='lon')
ds['lev'] = xr.DataArray(np.arange(5), dims='lev')
ds['data'] = xr.DataArray(np.random.rand(10, 3, 20, 20, 5), dims=['time', 'member', 'lat', 'lon', 'lev'])

ds.to_zarr('./test_zarrs/test2.zarr')

<xarray.backends.zarr.ZarrStore at 0x7f764d3778c0>

In [4]:
'''
Test 3: 

The first and second timestep are completely 0. 
'''

ds = xr.Dataset()
ds['time'] = xr.DataArray(np.arange(10), dims='time')
ds['member'] = xr.DataArray(np.arange(3), dims='member')
ds['lat'] = xr.DataArray(np.arange(20), dims='lat')
ds['lon'] = xr.DataArray(np.arange(20), dims='lon')
ds['lev'] = xr.DataArray(np.arange(5), dims='lev')
data = np.random.rand(10, 3, 20, 20, 5)
data[0] = np.zeros((3, 20, 20, 5))
data[1] = np.zeros((3, 20, 20, 5))
ds['data'] = xr.DataArray(data, dims=['time', 'member', 'lat', 'lon', 'lev'])

ds.to_zarr('./test_zarrs/test3.zarr')

<xarray.backends.zarr.ZarrStore at 0x7fa19712d4c0>

In [None]:
'''
Test 4: 

The last timestep is completely NaNs
'''

time = np.arange(10)

ds = xr.Dataset()
ds['time'] = xr.DataArray(time, dims='time')
ds['member'] = xr.DataArray(np.arange(3), dims='member')
ds['lat'] = xr.DataArray(np.arange(20), dims='lat')
ds['lon'] = xr.DataArray(np.arange(20), dims='lon')
ds['lev'] = xr.DataArray(np.arange(5), dims='lev')
data = np.random.rand(10, 3, 20, 20, 5)
data[-1, :, :, :, :] = np.nan
ds['data'] = xr.DataArray(data, dims=['time', 'member', 'lat', 'lon', 'lev'])

ds.to_zarr('./test_zarrs/test4.zarr')

<xarray.backends.zarr.ZarrStore at 0x7f500a558440>

In [None]:
'''
Test 5: 

It's a duplicate of the given file, except that one timestep is overwritten to make it not STRICTLY increasing in /projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/NorESM2-LM_ssp126_day_pr.zarr'
'''

ds = xr.open_zarr("/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/NorESM2-LM_ssp126_day_pr.zarr")

time_vals = ds.time.values
time_val = time_vals[5]
time_vals[6] = time_val
time_index = xr.DataArray(data=time_vals, attrs=ds.time.attrs, dims=ds.time.dims)
ds["time"] = time_index
                       
ds.to_zarr('./test_zarrs/test5.zarr')

<xarray.backends.zarr.ZarrStore at 0x7fe21ae8fc40>

In [3]:
'''
Test 6: 

It's a duplicate of the given file, except that two timesteps are overwritten to make it not increasing in /projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/NorESM2-LM_ssp126_day_pr.zarr'
'''

ds = xr.open_zarr("/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/NorESM2-LM_ssp126_day_pr.zarr")

time_vals = ds.time.values
time_val = time_vals[2]
time_vals[6] = time_val
time_vals[10] = time_vals[7]
time_index = xr.DataArray(data=time_vals, attrs=ds.time.attrs, dims=ds.time.dims)
ds["time"] = time_index
                       
ds.to_zarr('./test_zarrs/test6.zarr')

<xarray.backends.zarr.ZarrStore at 0x7f4464eb1cc0>

In [None]:
'''
Test 7: 

It's a duplicate of the given file, except that every other time step is deleted in /projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/NorESM2-LM_ssp126_day_pr.zarr'

When compared with its original self, it should be caught that the timesteps are different. 
'''

ds = xr.open_zarr("/projects/dgs/persad_research/SIMULATION_DATA/ZARR/RAMIP/SIM_VARIABLES/NorESM2-LM_ssp126_day_pr.zarr")

# delete every other timestep in ds 
time_vals = ds.time.values
time_vals = time_vals[::2]
time_index = xr.DataArray(data=time_vals, attrs=ds.time.attrs, dims=ds.time.dims)
ds["time"] = time_index


ds.to_zarr('./test_zarrs/test7.zarr')