In [None]:
import xarray
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cartopy.crs as ccrs
from tqdm import tqdm
from preprocess.sza import solarzenithangle
from utils.etc import benchmark
from dask.distributed import Client
import shutil, gc
import os

In [None]:
client = Client()

In [None]:
def create_collocated_zarr_dwd(seviri, station_id, patch_size=15):
    
    station = xarray.open_dataset(f'/capstor/scratch/cscs/kschuurm/DATA/DWD/netcdf/DWD_SOLAR_10min_{str(station_id).zfill(5)}.nc')
    station = station.drop_duplicates('time')

    lat = station.lat.values
    lon = station.lon.values
    ilat = (np.abs(seviri.y - lat)).argmin().values
    ilon = (np.abs(seviri.x - lon)).argmin().values

    hpatch = int(np.floor(patch_size/2))

    station['time'] = station.time - np.timedelta64(10, 'm')

    intersec = set(station.time.values).intersection(seviri.time.values)

    if len(intersec)< 1000:
        print(f'skipping station, too little points {station_id}')
        return None

    
    with benchmark('load'):
        seviri_slice = seviri.isel(y=slice(ilat-hpatch, ilat + hpatch +1), x=slice(ilon-hpatch, ilon+hpatch+1)).load()
    for v in list(seviri_slice.coords.keys()):
        if seviri_slice.coords[v].dtype == object:
            seviri_slice[v].encoding.clear()
    for var in seviri_slice:
        seviri_slice[var].encoding.clear()

    

    intersec_time = np.unique(np.array(list(set(station.time.values).intersection(set(seviri_slice.time.values)))))
    if len(intersec_time)<50:
        print(f'skipping {station_id} to little matching timeindices')
        return None
    subset_station = station.sel(time=intersec_time)
    seviri_slice = seviri_slice.sel(time=intersec_time)

    seviri_slice['GHI'] = ('time', (subset_station.GS_10.data/(10*60))*10000)# J/cm2 per 10 min to watt/m2
    
    seviri_slice['DIF'] = ('time', (subset_station.DS_10.data/(10*60))*10000)
    seviri_slice['station_name'] = str(subset_station.station_name.values)
    seviri_slice['lat_station'] = subset_station.lat.data
    seviri_slice['lon_station'] = subset_station.lon.data
    seviri_slice['altitude_station'] = subset_station.elevation.data
    
    with benchmark('dropna'):
        seviri_slice = seviri_slice.dropna('time')

    with benchmark('SZA'):
        SZA, AZI = solarzenithangle(pd.to_datetime(seviri_slice.time), 
                                    seviri_slice.lat_station.values, 
                                    seviri_slice.lon_station.values, 
                                    seviri_slice.altitude_station.values)

    seviri_slice['SZA'] = ('time', SZA.astype(np.float32))
    seviri_slice['AZI'] = ('time', AZI.astype(np.float32))

    for v in list(seviri_slice.coords.keys()):
        if seviri_slice.coords[v].dtype == object:
            seviri_slice[v].encoding.clear()

    seviri_slice = seviri_slice.chunk({'time':-1, 'channel':-1, 'y':-1, 'x':-1})
    seviri_slice.attrs = station.attrs


    with benchmark('zarr'):
        seviri_slice.to_zarr(SAVE_PATH + f'DWD_SOLAR_{str(station_id).zfill(5)}.zarr', mode='w')
    
    print(f'        STATION {station_id} DONE         ')

def create_collocated_zarr_knmi(seviri, station_id, patch_size=15):
    station = xarray.open_dataset(f'/capstor/scratch/cscs/kschuurm/DATA/KNMI/nc/KNMI_SOLAR_10min_{str(station_id)}.nc')
    station = station.drop_duplicates('time')
    print(station)
    lat = station.lat.values
    lon = station.lat.values
    ilat = (np.abs(seviri.y - lat)).argmin().values
    ilon = (np.abs(seviri.x - lon)).argmin().values

    hpatch = int(np.floor(patch_size/2))

    station['time'] = station.time - np.timedelta64(10, 'm')
    
    intersec = set(station.time.values).intersection(seviri.time.values)

    if len(intersec)< 1000:
        print(f'skipping station {station_id}, too little points')
        return None


    seviri_slice = seviri.isel(y=slice(ilat-hpatch, ilat + hpatch +1), x=slice(ilon-hpatch, ilon+hpatch+1)).load()
    for v in list(seviri_slice.coords.keys()):
        if seviri_slice.coords[v].dtype == object:
            seviri_slice[v].encoding.clear()
    for var in seviri_slice:
        seviri_slice[var].encoding.clear()

    intersec_time = np.sort(np.array(list(set(station.time.values).intersection(set(seviri_slice.time.values)))))
    if len(intersec_time)<50:
        print(f'skipping {station_id} to little matching timeindices')
        return None
    subset_station = station.sel(time=intersec_time)
    seviri_slice = seviri_slice.sel(time=intersec_time)
    
    seviri_slice['GHI'] = ('time', subset_station.Q_GLOB_10.data)
    seviri_slice['GHI_min'] = ('time', subset_station.QN_GLOB_10.data)
    seviri_slice['GHI_max'] = ('time', subset_station.QX_GLOB_10.data)
    seviri_slice['station_name'] = str(station_id)
    seviri_slice['lat_station'] = subset_station.lat.data
    seviri_slice['lon_station'] = subset_station.lon.data
    seviri_slice['altitude_station'] = subset_station.altitude.data
    
    with benchmark('dropna'):
        seviri_slice = seviri_slice.dropna('time')
    
    with benchmark('SZA'):
        SZA, AZI = solarzenithangle(pd.to_datetime(seviri_slice.time), 
                                    seviri_slice.lat_station.values, 
                                    seviri_slice.lon_station.values, 
                                    seviri_slice.altitude_station.values)

    seviri_slice['SZA'] = ('time', SZA.astype(np.float32))
    seviri_slice['AZI'] = ('time', AZI.astype(np.float32))

    for v in list(seviri_slice.coords.keys()):
        if seviri_slice.coords[v].dtype == object:
            seviri_slice[v].encoding.clear()

    seviri_slice = seviri_slice.chunk({'time':-1, 'channel':-1, 'y':-1, 'x':-1})
    seviri_slice.attrs = station.attrs

    with benchmark('zarr'):
        seviri_slice.to_zarr(SAVE_PATH + f'KNMI_SOLAR_{str(station_id)}.zarr', mode='w')
    
    print(f'        STATION {station_id} DONE         ')
    
def create_collocated_zarr_meteoswiss(seviri, stations_meteoswiss, station_name, patch_size=15):
    station = stations_meteoswiss.sel(station_name=station_name).load().copy(deep=True)
    
    lat = station.y.values
    lon = station.x.values
    ilat = (np.abs(seviri.y - lat)).argmin().values
    ilon = (np.abs(seviri.x - lon)).argmin().values

    hpatch = int(np.floor(patch_size/2))
    
    station['time'] = station.time - np.timedelta64(10, 'm')
    
    with benchmark('dropna station'):
        station = station.dropna('time', subset=['ssi'])
    
    intersec = set(station.time.values).intersection(seviri.time.values)

    if len(intersec)< 1000:
        print(f'skipping station, too little points {station_name}')
        return None


    seviri_slice = seviri.isel(y=slice(ilat-hpatch, ilat + hpatch +1), x=slice(ilon-hpatch, ilon+hpatch+1)).load()
    for v in list(seviri_slice.coords.keys()):
        if seviri_slice.coords[v].dtype == object:
            seviri_slice[v].encoding.clear()
    for var in seviri_slice:
        seviri_slice[var].encoding.clear()

        
    intersec_time = np.sort(np.array(list(set(station.time.values).intersection(set(seviri_slice.time.values)))))
    if len(intersec_time)<50:
        print(f'skipping {station_name} to little matching timeindices')
        return None
    subset_station = station.sel(time=intersec_time)
    seviri_slice = seviri_slice.sel(time=intersec_time)
    
    seviri_slice['GHI'] = ('time', subset_station.ssi.data)
    seviri_slice['station_name'] = str(station_name)
    seviri_slice['lat_station'] = subset_station.y.data
    seviri_slice['lon_station'] = subset_station.x.data
    seviri_slice['altitude_station'] = subset_station.altitude.data
    

    with benchmark('SZA'):
        SZA, AZI = solarzenithangle(pd.to_datetime(seviri_slice.time), 
                                    seviri_slice.lat_station.values, 
                                    seviri_slice.lon_station.values, 
                                    float(seviri_slice.altitude_station.values))

    seviri_slice['SZA'] = ('time', SZA.astype(np.float32))
    seviri_slice['AZI'] = ('time', AZI.astype(np.float32))

    for v in list(seviri_slice.coords.keys()):
        if seviri_slice.coords[v].dtype == object:
            seviri_slice[v].encoding.clear()

    seviri_slice = seviri_slice.rename_dims({'y':'lat','x':'lon'}).rename({'y':'lat', 'x':'lon'})
    seviri_slice = seviri_slice.chunk({'time':10000, 'channel':-1, 'lat':-1, 'lon':-1})

    with benchmark('zarr'):
        seviri_slice.to_zarr(SAVE_PATH + f'METEOSWISS_SOLAR_{str(station_name)}.zarr', mode='w')
    
    print(f'        STATION {station_name} DONE         ')


In [None]:

SAVE_PATH = f'/capstor/scratch/cscs/kschuurm/ZARR/DWD/'
stations_dwd = xarray.open_dataset('/capstor/scratch/cscs/kschuurm/DATA/DWD/netcdf/DWD_SOLAR_index.nc')

# Using a zarr dataset chunked in space with a time chunksize of at least 1000 for performance
seviri = xarray.open_zarr('/capstor/scratch/cscs/kschuurm/ZARR/SEVIRI_FULLDISK_timechunked.zarr')

for nm in tqdm(stations_dwd.station_id.values):
    if os.path.exists(SAVE_PATH + f'DWD_SOLAR_{str(nm).zfill(5)}.zarr'):
        print(f'skip {nm}')
        continue
    create_collocated_zarr_dwd(seviri, nm)



In [None]:
SAVE_PATH = f'/capstor/scratch/cscs/kschuurm/ZARR/KNMI/'
stations_knmi = xarray.open_dataset('/capstor/scratch/cscs/kschuurm/DATA/KNMI/nc/index.nc')

for nm in tqdm(stations_knmi.station.values):
    
    if os.path.exists(f'/capstor/scratch/cscs/kschuurm/DATA/KNMI/nc/KNMI_SOLAR_10min_{str(nm)}.nc'):
        create_collocated_zarr_knmi(seviri, nm)
    else:
        print('station does not have data')
      


In [None]:
SAVE_PATH = f'/scratch/snx3000/kschuurm/ZARR/METEOSWISS/'
stations_meteoswiss = xarray.open_zarr('/scratch/snx3000/kschuurm/DATA/METEOSWISS/ground_station_data.zarr')
seviri = xarray.open_zarr('/scratch/snx3000/kschuurm/ZARR/SEVIRI_FULLDISK_timechunked.zarr')


for nm in tqdm(stations_meteoswiss.station.values):
    
    create_collocated_zarr_meteoswiss(seviri, stations_meteoswiss, nm)

