In [1]:
import xarray
import pandas as pd
import numpy as np
from datetime import datetime
from glob import glob
from tqdm import tqdm
import os

In [None]:
fn = 'zip/kis_tos_*.gz'
fns = glob(fn)

def bad_lines(x):
    print(x)
    pass

df_list = []
for fn in fns:
    df = pd.read_csv(fn,
                sep=' \s+',
                comment='#',
                on_bad_lines=bad_lines,
                engine='python', 
                names=['datetime', 'location', 'name', 'lat', 'lon', 'altitude','Q_GLOB_10', 'QN_GLOB_10', 'QX_GLOB_10', 'SQ_10'],
                dtype={ 'Q_GLOB_10': np.float32, 'QN_GLOB_10': np.float32, 'QX_GLOB_10': np.float32, 'SQ_10': np.float32},
                parse_dates=['datetime'],
                date_format='%Y-%m-%d %H:%M:%S')
     
    df_list.append(df)

df = pd.concat(df_list)


In [10]:
SAVE_PATH = '/home/kr/Documents/Solar_Power_Forecasting/03_ground_stations/KNMI/nc/'
stations_list = df.location.unique()

In [None]:
def parse_KNMI_dates(x):
    return datetime.strptime(x, '%Y-%m-%d %H:%M:%S')

for station in stations[2:]:
    x = df.loc[df.location == station]
    lat = x.lat.iloc[0]
    lon = x.lon.iloc[0]
    altitude = x.altitude.iloc[0]
    station = x.location.iloc[0]
    name = x.name.iloc[0]
    x = x.set_index(['datetime'])
    x = x.drop(columns=['lat', 'lon', 'altitude', 'name', 'location'])
    xar = x.to_xarray()
    xar = xar.assign_coords({'lat':lat, 'lon':lon, 'altitude':altitude})
    xar = xar.rename_dims({'datetime':'time'}).rename({'datetime':'time'})
    xar = xar.assign_attrs( {'provider': 'KNMI', 'station':station, 'name':name,'start_date': x.index.min().isoformat(), 'end_date': x.index.max().isoformat()})
    for var, (nm1, nm2) in zip(['Q_GLOB_10', 'QN_GLOB_10', 'QX_GLOB_10', 'SQ_10'], 
                           [['global_sky_radiation_avg', 'Global average all sky radiation in previous 10 minutes'], 
                            ['global_sky_radiation_min', 'Global minimum all sky radiation in previous 10 minutes'],
                            ['global_sky_radiation_max', 'Global maximum all sky radiation in previous 10 minutes'],
                            ['sum_sunshine_duration', 'sunshine duration during the previous 10 minutes'],]):
        if var == 'SQ_10':
            xar[var] = xar[var].assign_attrs({
                'long_name' : nm1 ,
                'standard_name' : nm2,
                'units' : 'minutes',
            })
        else:
            xar[var] = xar[var].assign_attrs({
                'long_name' : nm1 ,
                'standard_name' : nm2,
                'units' : 'W m-2',
            })
    
    xar.to_netcdf(SAVE_PATH + f'KNMI_SOLAR_10min_{station}.nc')


In [31]:
names = []
lats = []
lons = []
date_starts = []
date_ends = []
stations=[]
altitudes = []

for station in stations_list:
    x = df.loc[df.location == station]
    lat = x.lat.iloc[0]
    lon = x.lon.iloc[0]
    altitude = x.altitude.iloc[0]
    station = x.location.iloc[0]
    name = x.name.iloc[0]
    date_start = pd.to_datetime(x.datetime.min())
    date_end = pd.to_datetime(x.datetime.max())
    index_dict = {}
    names.append(name)
    lats.append(lat)
    lons.append(lon)
    date_starts.append(date_start) 
    date_ends.append(date_end) 
    stations.append(station)
    altitudes.append(altitude) 

    
index_dict = {'station':stations, 'name':names, 'lat':lats, 'lon':lons, 'altitude':altitudes, 'date_start':date_starts, 'date_end':date_ends}

In [None]:
coords = index_dict.pop('station')
index = {key: ('station', val) for key,val in index_dict.items()}

index_xar = xarray.Dataset(data_vars=index, coords={'station': coords})

In [33]:
index_xar.to_netcdf('./nc/index.nc')