In [None]:
import xarray
import numpy as np
from tqdm import tqdm
from utils.etc import benchmark
from dask.distributed import Client
import os

In [None]:
client = Client()

In [None]:
folder = '/capstor/scratch/cscs/kschuurm/DATA/IEA_PVPS/'
stations = xarray.open_dataset(folder + 'index.nc')

In [None]:
def create_collocated_zarr(seviri, station_name, time_window=10, patch_size=15):
    
    station = xarray.open_dataset(f'/capstor/scratch/cscs/kschuurm/DATA/IEA_PVPS/IEA_PVPS-{station_name}.nc').load()
    
    with benchmark('where'):
        invalid = xarray.where((station.GHI > 1500), True, False) # check unphysical values
        invalid2 = xarray.where(station.QC.isnull() != True, True, False) # Quality control
        station['GHI'][invalid] = np.nan
        station['GHI'][invalid2] = np.nan
    
    rolling = station.rolling(time=time_window,center=False) \
        .mean().dropna(dim='time', subset=['GHI'])
    
    rolling['time'] = rolling['time'] - np.timedelta64(time_window, 'm')
    
    lat = station.latitude.values
    lon = station.longitude.values
    ilat = (np.abs(seviri.y - lat)).argmin().values
    ilon = (np.abs(seviri.x - lon)).argmin().values

    hpatch = int(np.floor(patch_size/2))
    
    with benchmark('load'):
        seviri_slice = seviri.isel(y=slice(ilat-hpatch, ilat + hpatch +1), x=slice(ilon-hpatch, ilon+hpatch+1)).load()
    for v in list(seviri_slice.coords.keys()):
        if seviri_slice.coords[v].dtype == object:
            seviri_slice[v].encoding.clear()
    for var in seviri_slice:
        seviri_slice[var].encoding.clear()


    intersec = np.unique(np.array(list(set(rolling.time.values).intersection(set(seviri_slice.time.values)))))
    print(len(intersec))
    
    subset_station = rolling.sel(time=intersec)
    seviri_slice = seviri_slice.sel(time=intersec)

    seviri_slice['GHI'] = ('time', subset_station.GHI.data)
    seviri_slice['DIF'] = ('time', subset_station.DIF.data)
    seviri_slice['DNI'] = ('time', subset_station.DNI.data)
    seviri_slice['GHIcalc'] = ('time', subset_station.GHIcalc.data)
    seviri_slice['QC'] = ('time', subset_station.QC.data)
    seviri_slice['KI'] =  ('time', subset_station.Kc.data)
    seviri_slice['SZA'] = ('time', (90 - subset_station['Elev'].data)/180*np.pi)# SZA = 90 - Elev, [0, 90*] or [0, 1/2pi]
    seviri_slice['AZI'] = ('time', subset_station['Azim'].data/180*np.pi) # SZA = 90 - Elev, [0, 90*] or [0, 1/2pi]
                         
    seviri_slice['station_name'] = str(station_name)
    seviri_slice['lat_station'] = subset_station.latitude.data
    seviri_slice['lon_station'] = subset_station.longitude.data
    seviri_slice['altitude_station'] = subset_station.elevation.data
    
    with benchmark('dropna'):
        seviri_slice = seviri_slice.dropna('time', subset=['channel_data'])
        print(len(seviri_slice.time))

    for v in list(seviri_slice.coords.keys()):
        if seviri_slice.coords[v].dtype == object:
            seviri_slice[v].encoding.clear()

    seviri_slice = seviri_slice.chunk({'time':-1, 'channel':-1, 'y':-1, 'x':-1})
    seviri_slice.attrs = station.attrs

    with benchmark('zarr'):
        seviri_slice.to_zarr(SAVE_PATH + f'IEA_PVPS_{str(station_name)}.zarr', mode='w')
    
    print(f'        STATION {station_name} DONE         ')


In [None]:
SAVE_PATH = f'/capstor/scratch/cscs/kschuurm/ZARR/IEA_PVPS/'

seviri = xarray.open_zarr('/capstor/scratch/cscs/kschuurm/ZARR/SEVIRI_FULLDISK_timechunked.zarr')

for nm in tqdm(stations.station_name.values):
    print(nm)
    if not os.path.exists(SAVE_PATH + f'IEA_PVPS_{str(nm)}.zarr'):
        create_collocated_zarr(seviri, nm)
   