In [None]:
import xarray
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cartopy.crs as ccrs
from tqdm import tqdm
from preprocess.sza import solarzenithangle
from utils.etc import benchmark
from dask.distributed import Client
import shutil, gc
import os

In [None]:
client = Client()

In [None]:
folder = '/capstor/scratch/cscs/kschuurm/DATA/IEA_PVPS/'


index = xarray.open_dataset(folder + 'index.nc')

extent = [-8, 29, 29, 62]

index = index.where((index.longitude > extent[0]) &  
                    (index.longitude < extent[1]) &
                    (index.latitude > extent[2]) &
                    (index.latitude < extent[3]), drop=True)

In [None]:
folder = '/capstor/scratch/cscs/kschuurm/DATA/IEA_PVPS/'
CAB = xarray.open_dataset(folder + 'IEA_PVPS-CAB.nc')
CAB

In [None]:
hrseviri = xarray.open_zarr('/capstor/scratch/cscs/kschuurm/ZARR/SEVIRI_FULLDISK.zarr')

proj = ccrs.PlateCarree()

fig, axis = plt.subplots(1, 1, subplot_kw=dict(projection=proj))

gl = axis.gridlines(crs=ccrs.PlateCarree(), draw_labels=True,
                  linewidth=2, color='gray', alpha=0.5, linestyle='--')

axis.coastlines()

lat = index.latitude.values
lon = index.longitude.values
name = index.station_name.values
axis.scatter(x=lon, y=lat, c='r')
for i, txt in enumerate(name):
    axis.annotate(txt, (lon[i], lat[i]))

hrseviri.channel_data.sel(channel='VIS006').isel(time = 1000).plot.imshow(ax=axis, transform = proj)



In [None]:
def create_collocated_zarr(hrseviri, station_name, time_window=10, patch_size=15):
    
    station = xarray.open_dataset(f'/capstor/scratch/cscs/kschuurm/DATA/IEA_PVPS/IEA_PVPS-{station_name}.nc').load()
    
#     station = station.drop(['station_name'])
    with benchmark('where'):
        invalid = xarray.where((station.GHI > 1500), True, False)
        invalid2 = xarray.where(station.QC.isnull() != True, True, False)
        station['GHI'][invalid] = np.nan
        station['GHI'][invalid2] = np.nan
    
    rolling = station.rolling(time=time_window,center=False) \
        .mean().dropna(dim='time', subset=['GHI'])
    
    rolling['time'] = rolling['time'] - np.timedelta64(time_window, 'm')
    
    lat = station.latitude.values
    lon = station.longitude.values
    ilat = (np.abs(hrseviri.y - lat)).argmin().values
    ilon = (np.abs(hrseviri.x - lon)).argmin().values

    hpatch = int(np.floor(patch_size/2))
    
    with benchmark('load'):
        hres_slice = hrseviri.isel(y=slice(ilat-hpatch, ilat + hpatch +1), x=slice(ilon-hpatch, ilon+hpatch+1)).load()
    for v in list(hres_slice.coords.keys()):
        if hres_slice.coords[v].dtype == object:
            hres_slice[v].encoding.clear()
    for var in hres_slice:
        hres_slice[var].encoding.clear()


    intersec = np.unique(np.array(list(set(rolling.time.values).intersection(set(hres_slice.time.values)))))
    print(len(intersec))
    
    subset_station = rolling.sel(time=intersec)
    hres_slice = hres_slice.sel(time=intersec)

    hres_slice['GHI'] = ('time', subset_station.GHI.data)
    hres_slice['DIF'] = ('time', subset_station.DIF.data)
    hres_slice['DNI'] = ('time', subset_station.DNI.data)
    hres_slice['GHIcalc'] = ('time', subset_station.GHIcalc.data)
    hres_slice['QC'] = ('time', subset_station.QC.data)
    hres_slice['KI'] =  ('time', subset_station.Kc.data)
    hres_slice['SZA'] = ('time', (90 - subset_station['Elev'].data)/180*np.pi)# SZA = 90 - Elev, [0, 90*] or [0, 1/2pi]
    hres_slice['AZI'] = ('time', subset_station['Azim'].data/180*np.pi) # SZA = 90 - Elev, [0, 90*] or [0, 1/2pi]
                         
    hres_slice['station_name'] = str(station_name)
    hres_slice['lat_station'] = subset_station.latitude.data
    hres_slice['lon_station'] = subset_station.longitude.data
    hres_slice['altitude_station'] = subset_station.elevation.data
    
    with benchmark('dropna'):
        hres_slice = hres_slice.dropna('time', subset=['channel_data'])
        print(len(hres_slice.time))

    for v in list(hres_slice.coords.keys()):
        if hres_slice.coords[v].dtype == object:
            hres_slice[v].encoding.clear()

    hres_slice = hres_slice.chunk({'time':-1, 'channel':-1, 'y':-1, 'x':-1})
    hres_slice.attrs = station.attrs

    with benchmark('zarr'):
        hres_slice.to_zarr(SAVE_PATH + f'IEA_PVPS_{str(station_name)}.zarr', mode='w')
    
    print(f'        STATION {station_name} DONE         ')



In [None]:

SAVE_PATH = f'/capstor/scratch/cscs/kschuurm/ZARR/IEA_PVPS/'

hrseviri = xarray.open_zarr('/capstor/scratch/cscs/kschuurm/ZARR/SEVIRI_FULLDISK_timechunked.zarr')

for nm in tqdm(index.station_name.values):
    print(nm)
    if not os.path.exists(SAVE_PATH + f'IEA_PVPS_{str(nm)}.zarr'):
        create_collocated_zarr(hrseviri, nm)
   

In [None]:
def create_collocated_zarr_knmi(hrseviri, station_id, patch_size=31):
    station = xarray.open_dataset(f'/capstor/scratch/cscs/kschuurm/DATA/KNMI/nc/KNMI_SOLAR_10min_{str(station_id)}.nc')
    station = station.drop_duplicates('time')

    lat = station.lat.values
    lon = station.lon.values
    ilat = (np.abs(hrseviri.y - lat)).argmin().values
    ilon = (np.abs(hrseviri.x - lon)).argmin().values

    hpatch = int(np.floor(patch_size/2))

    station['time'] = station.time - np.timedelta64(10, 'm')
    
    intersec = set(station.time.values).intersection(hrseviri.time.values)

    if len(intersec)< 1000:
        print(f'skipping station {station_id}, too little points')
        return None


    hres_slice = hrseviri.isel(y=slice(ilat-hpatch, ilat + hpatch +1), x=slice(ilon-hpatch, ilon+hpatch+1)).load()
    for v in list(hres_slice.coords.keys()):
        if hres_slice.coords[v].dtype == object:
            hres_slice[v].encoding.clear()
    for var in hres_slice:
        hres_slice[var].encoding.clear()

    intersec_time = np.sort(np.array(list(set(station.time.values).intersection(set(hres_slice.time.values)))))
    if len(intersec_time)<50:
        print(f'skipping {station_id} to little matching timeindices')
        return None
    subset_station = station.sel(time=intersec_time)
    hres_slice = hres_slice.sel(time=intersec_time)
    
    hres_slice['GHI'] = ('time', subset_station.Q_GLOB_10.data)
    hres_slice['GHI_min'] = ('time', subset_station.QN_GLOB_10.data)
    hres_slice['GHI_max'] = ('time', subset_station.QX_GLOB_10.data)
    hres_slice['station_name'] = str(station_id)
    hres_slice['lat_station'] = subset_station.lat.data
    hres_slice['lon_station'] = subset_station.lon.data
    hres_slice['altitude_station'] = subset_station.altitude.data
    
    with benchmark('dropna'):
        hres_slice = hres_slice.dropna('time')
    
    with benchmark('SZA'):
        SZA, AZI = solarzenithangle(pd.to_datetime(hres_slice.time), 
                                    hres_slice.lat_station.values, 
                                    hres_slice.lon_station.values, 
                                    hres_slice.altitude_station.values)

    hres_slice['SZA'] = ('time', SZA.astype(np.float32))
    hres_slice['AZI'] = ('time', AZI.astype(np.float32))

    for v in list(hres_slice.coords.keys()):
        if hres_slice.coords[v].dtype == object:
            hres_slice[v].encoding.clear()

    hres_slice = hres_slice.chunk({'time':50000, 'channel':-1, 'y':-1, 'x':-1})
    hres_slice.attrs = station.attrs

    with benchmark('zarr'):
        hres_slice.to_zarr(SAVE_PATH + f'KNMI_SOLAR_{str(station_id)}.zarr', mode='w')
    
    print(f'        STATION {station_id} DONE         ')
    

In [None]:
SAVE_PATH = f'/capstor/scratch/cscs/kschuurm/ZARR/KNMI/'

stations_knmi = xarray.open_dataset('/capstor/scratch/cscs/kschuurm/DATA/KNMI/nc/index.nc')


    
for nm in tqdm(stations_knmi.station.values):
    
    if os.path.exists(f'/capstor/scratch/cscs/kschuurm/DATA/KNMI/nc/KNMI_SOLAR_10min_{str(nm)}.nc'):
        create_collocated_zarr_knmi(hrseviri, nm)
    else:
        print(f'station {nm} does not have data')
      

# PLAYGROUND


In [None]:
station_name = 'CAR'
station = xarray.open_dataset(f'/scratch/snx3000/kschuurm/DATA/IEA_PVPS/IEA_PVPS-{station_name}.nc')


In [None]:
station = station.where(station.GHI < 100, drop=True)

In [None]:
station.drop(['station_name'])

In [None]:
rolling = station.GHI.rolling(time=12,center=False).mean().dropna(dim='time')
print(rolling)

station['time'] = station['time'] - np.timedelta64(12, 'm')

station = station.rolling

In [None]:
station.drop(['station_name']).rolling(time=12, center=False).mean().dropna('time', subset=['GHI'])

In [None]:
plt.show()