In [None]:
import xarray
from dask.distributed import Client
from glob import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import xarray
import pandas as pd
from datetime import datetime, timedelta
import cartopy.crs as ccrs

from preprocess.sza import solarzenithangle


In [None]:

client = Client(n_workers = 12)

In [None]:
hres = xarray.open_zarr('/scratch/snx3000/kschuurm/DATA/HRSEVIRI_corrected.zarr')
hres

In [None]:
sarah = xarray.open_zarr('/scratch/snx3000/kschuurm/DATA/SARAH3.zarr')
sarah

In [None]:

lat = hres.lat.copy()
lon = hres.lon.copy()
# del hres

def reindex_sarah3(df):
    return df.reindex(lat=lat, lon=lon, method='nearest').drop_vars(['record_status'])


In [None]:
fns = glob('/scratch/snx3000/kschuurm/DATA/SARAH3/ORD53913/SIDin*.nc')
i_step = 100

for i in tqdm(range(0, len(fns), i_step)):
    
    sl = slice(i, i+i_step)
    
    sid = xarray.open_mfdataset(fns[sl], 
                            parallel=True,
                            engine='h5netcdf',
                            chunks={'time':60, 'lat':-1, 'lon':-1},
                            preprocess=reindex_sarah3)
    if i == 0:
        sid.to_zarr('/scratch/snx3000/kschuurm/DATA/SARAH3.zarr', mode='w')
    else:
        sid.to_zarr('/scratch/snx3000/kschuurm/DATA/SARAH3.zarr', append_dim='time')


In [None]:
sis = xarray.open_mfdataset('/scratch/snx3000/kschuurm/DATA/SARAH3/SIS_*.nc', 
                            engine='h5netcdf',
                           parallel=True,
                            chunks={'time':60, 'lat':-1, 'lon':-1},
                           preprocess=reindex_sarah3)
sarah = sis.SIS.to_zarr('/scratch/snx3000/kschuurm/DATA/SARAH3.zarr', mode='a')

In [31]:
fns = glob('/scratch/snx3000/kschuurm/DATA/SARAH3/ORD52533/CALin2014*.nc') + glob('/scratch/snx3000/kschuurm/DATA/SARAH3/ORD52533/CALin2015*.nc')
# print(len(fns))
cal = xarray.open_mfdataset(fns, 
                            parallel=True,
                            engine='h5netcdf',
                            chunks={'time':60, 'lat':-1, 'lon':-1},
                            preprocess=reindex_sarah3)

cal.CAL.to_zarr('/scratch/snx3000/kschuurm/DATA/SARAH3.zarr', mode='a')




<xarray.backends.zarr.ZarrStore at 0x15541c714270>

In [None]:
cal = xarray.open_mfdataset(fns, 
                            parallel=True,
                            engine='h5netcdf',
                            chunks={'time':60, 'lat':-1, 'lon':-1},
                            preprocess=reindex_sarah3)
cal.to_zarr('/scratch/snx3000/kschuurm/DATA/SARAH3.zarr',  append_dim='time')

In [33]:

def solarzenithangle_latlon(ds):
    subsample_int = 4
    
    da_temp = ds.channel_1
    
    a = [x for x in range(0, len(da_temp.time)-1, 1)]
    a.append(len(da_temp.time)-1)
    datetimes = pd.to_datetime(da_temp.time[a])
    
    lats = np.arange(da_temp.lat.min(), da_temp.lat.max()+1, 1, dtype=np.float32)
    lons = np.arange(da_temp.lon.min(), da_temp.lon.max()+1, 1, dtype=np.float32)
    
    
    da_sza = xarray.DataArray(coords={'time':datetimes, 'lat':lats, 'lon':lons,},
                          data=np.zeros(shape=(len(datetimes), len(lats), len(lons)),
                                       dtype=np.float16))
    da_sza.name = 'SZA'
    da_sza.attrs.update({'long_name': 'Solar Zenith Angle at sea level',
                      'standard_name': 'solar_zenith_angle',
                      'units':'rad'})
    da_azi = xarray.DataArray(coords={'time':datetimes, 'lat':lats, 'lon':lons,},
                          data=np.zeros(shape=(len(datetimes), len(lats), len(lons)), 
                                        dtype=np.float16))
    da_azi.name = 'AZI'
    da_azi.attrs.update({'long_name': 'Solar Azimuth Angle at sea level',
                      'standard_name': 'solar_azimuth_angle',
                      'units':'rad'})
    
    for i, lat in tqdm(enumerate(lats)):
        for j, lon in enumerate(lons):
            sza, azi = solarzenithangle(datetimes, lat, lon, 0)
            da_sza[:,i, j] = sza
            da_azi[:,i,j] = azi
    
    
    ds = xarray.Dataset({'SZA':da_sza, 'AZI':da_azi})
    return ds


        

In [34]:
ds_sun = solarzenithangle_latlon(hres)

28it [20:49, 44.61s/it]


In [35]:
ds_sun.chunk({'time':1, 'lat':-1, 'lon':-1}).to_zarr('temp.zarr')

This may cause some slowdown.
Consider scattering data ahead of time and using futures.


<xarray.backends.zarr.ZarrStore at 0x155416ddc510>

In [36]:
temp = xarray.open_zarr('temp.zarr')

In [37]:
ds_sun_interp = temp.interp(lat=hres.lat, lon=hres.lon)

In [38]:
temp2 = ds_sun_interp.to_zarr('temp2.zarr')

This may cause some slowdown.
Consider scattering data ahead of time and using futures.


In [41]:
temp2 = xarray.open_zarr('temp2.zarr')

ValueError: Starting with Zarr 2.11.0, stores must be subclasses of BaseStore, if your store exposes the MutableMapping interface wrap it in Zarr.storage.KVStore. Got <xarray.backends.zarr.ZarrStore object at 0x15541578c970>

In [39]:
for var in temp2:
    del temp2[var].encoding['chunk']

ds_sun_final = temp2.chunk({'time':60, 'lat':-1, 'lon':-1})

TypeError: 'ZarrStore' object is not iterable

In [None]:
ds_sun_final.to_zarr('/scratch/snx3000/kschuurm/DATA/HRSEVIRI_corrected.zarr', mode='a')
