In [1]:
# Import required libraries
import xarray as xr
from pathlib import Path
import numpy as np
import datetime
import pandas as pd
import cfgrib



In [2]:
# Read the original datacube
ds_original = xr.open_dataset(Path.home() / 'hdd1/skondylatos/uc3/greece_big.nc')

In [3]:
ds_original

In [40]:
def calculate_max(ds, time,stat):
    ds = ds.max(dim = 'time').expand_dims(time = time)
    ds['time'] = ds['time'] + pd.Timedelta('1 days')
    ds = ds.rename({stat:'max_' + stat})
    return ds

def calculate_min(ds, time,stat):
    ds = ds.min(dim = 'time').expand_dims(time = time)
    ds['time'] = ds['time'] + pd.Timedelta('1 days')
    ds = ds.rename({stat:'min_' + stat})
    return ds

def calculate_avg(ds, time,stat):
    ds = ds.mean(dim = 'time').expand_dims(time = time)
    ds['time'] = ds['time'] + pd.Timedelta('1 days')
    ds = ds.rename({stat:'avg_' + stat})
    return ds

def calculate_wind_speed(ds):
    ds['wind_speed'] = np.sqrt(ds['u10'] ** 2 + ds['v10'] ** 2)
    return ds

In [43]:
#Fuction to create a dataset of a given meteo stat
def create_stat(stat):
    #List to store the datasets per day
    daily = []
    
    for i in range(193, 366):
        #List to store the datasets per hour
        dses = []
        
        for j in range(24, 36):
            #Path to the dataset
            path = 'jh-shared/uc3_service/2021_' + str(i) + '_1/' + str(stat) + '_' + str(j) + '.grib2'
            path = Path.home() / path
            tmp = xr.open_dataset(path, engine = 'cfgrib', indexpath = '')
            
            #Drop dimenesions we don't need
            if(stat == 'tp'):
                tmp = tmp.drop(['time', 'step', 'surface'])
            else:
                tmp = tmp.drop(['time', 'step', 'heightAboveGround'])
            time = tmp.valid_time.values
            tmp = tmp.expand_dims(time = [time])
            tmp = tmp.drop('valid_time')
            
            #Interpolate to lat and lon of the original dataset
            tmp = tmp.interp(longitude = ds_original.x.values, latitude = ds_original.y.values, method = 'nearest')
            
            #Add the dataset to the list
            dses.append(tmp)
            
        #Merge the datasets that are per hour to one for the day
        #We choose a random hour since we only care for the date
        time = dses[0].time.values
        if(stat == 'tp'):
            x = xr.merge(dses, compat='no_conflicts') 
            #From mm to m
            x = x / 1000
        else:
            x = xr.merge(dses, compat='no_conflicts')
        
        #We calculate the max,min and avg for the day
        x_max = calculate_max(x, time,stat)
        x_min = calculate_min(x, time,stat)
        x_avg = calculate_avg(x, time,stat)
        
        #Merge the stats we calculated to one dataset
        x = xr.merge([x_max, x_min, x_avg])
        
        #Add the dataset for the day to the list
        daily.append(x)
    
    #We merge all the datasets to one for the year 2021
    #We don't include the last two datasets cause they are predictions for the year 2022
    x = xr.merge([daily[0], daily[1]])
    for i in range(2,len(daily) - 2):
        x = xr.merge([x, daily[i]])
        
    return x

In [44]:
x_t2m = create_stat('t2m')

In [45]:
x_t2m

In [46]:
x_tp = create_stat('tp')

In [47]:
x_tp

In [49]:
#List of datasets for every day
daily_u10 = []
daily_v10 = []
daily_wind_speed = []

#For every day
for i in range(193, 366):
    #List of datasets for different times of day
    dses_u10 = []
    dses_v_10 = []
    
    #For every time of day
    for j in range(24, 36):
        path_u10 = 'jh-shared/uc3_service/2021_' + str(i) + '_1/u10_' + str(j) + '.grib2'
        path_u10 = Path.home() / path_u10
        
        path_v_10 = 'jh-shared/uc3_service/2021_' + str(i) + '_1/v_10_' + str(j) + '.grib2'
        path_v_10 = Path.home() / path_v_10
        
        #Read the  girb file for the specific time
        tmp_u10 = xr.open_dataset(path_u10, engine = 'cfgrib', indexpath='')
        tmp_v_10 = xr.open_dataset(path_v_10, engine = 'cfgrib', indexpath='')
        
        
        #Drop dimensions
        tmp_u10 = tmp_u10.drop(['time', 'step', 'heightAboveGround'])
        tmp_v_10 = tmp_v_10.drop(['time', 'step', 'heightAboveGround'])
        
        #Add a time dimension
        time = tmp_u10.valid_time.values
        tmp_u10 = tmp_u10.expand_dims(time = [time])
        tmp_v_10 = tmp_v_10.expand_dims(time = [time])
        tmp_u10 = tmp_u10.drop('valid_time')
        tmp_v_10 = tmp_v_10.drop('valid_time')
        
        #Interpolate to the original datacube
        tmp_u10 = tmp_u10.interp(longitude = ds_original.x.values, latitude = ds_original.y.values, method = 'nearest')
        tmp_v_10 = tmp_v_10.interp(longitude = ds_original.x.values, latitude = ds_original.y.values, method = 'nearest')
        
        #Add the dataset to the list
        dses_u10.append(tmp_u10)
        dses_v_10.append(tmp_v_10)
        
        
    #Merge the datasets so we have one for the day
    x_u10 = xr.merge(dses_u10, compat='no_conflicts') 
    x_v10 = xr.merge(dses_v_10, compat='no_conflicts') 
    x_wind_speed = xr.merge([x_u10, x_v10])
    time = dses_u10[0].time.values
    
    #Calculate the required variables
    x_u10_max = calculate_max(x_u10, time, 'u10')
    x_u10_min = calculate_min(x_u10, time, 'u10')
    x_u10_avg = calculate_avg(x_u10, time, 'u10')
    
    x_v10_max = calculate_max(x_v10, time, 'v10')
    x_v10_min = calculate_min(x_v10, time, 'v10')
    x_v10_avg = calculate_avg(x_v10, time, 'v10')
    
    x_wind_speed = calculate_wind_speed(x_wind_speed)
    
    x_wind_speed_max = calculate_max(x_wind_speed, time, 'wind_speed')
    x_wind_speed_min = calculate_min(x_wind_speed, time, 'wind_speed')
    x_wind_speed_avg = calculate_avg(x_wind_speed, time, 'wind_speed')
    
    #Merge the datasets to one
    x_u10 = xr.merge([x_u10_max, x_u10_min, x_u10_avg])
    
    x_v10 = xr.merge([x_v10_max, x_v10_min, x_v10_avg])
    
    x_wind_speed_max = x_wind_speed_max.drop(['u10', 'v10'])
    x_wind_speed_min = x_wind_speed_min.drop(['u10', 'v10'])
    x_wind_speed_avg = x_wind_speed_avg.drop(['u10', 'v10'])
    
    x_wind_speed = xr.merge([x_wind_speed_max, x_wind_speed_min, x_wind_speed_avg])
    
    #Add the daily dataset to the list
    daily_u10.append(x_u10)
    daily_v10.append(x_v10)
    daily_wind_speed.append(x_wind_speed)
    
# Merge all the days to one dataset
x_u10 = xr.merge([daily_u10[0], daily_u10[1]])
x_v10 = xr.merge([daily_v10[0], daily_v10[1]])
x_wind_speed = xr.merge([daily_wind_speed[0], daily_wind_speed[1]])

#The last two datasets are predictions for 2022 so we don't include them
for i in range(2,len(daily_u10) - 2):
    x_u10 = xr.merge([x_u10, daily_u10[i]])
    x_v10 = xr.merge([x_v10, daily_v10[i]])
    x_wind_speed = xr.merge([x_wind_speed, daily_wind_speed[i]])

In [50]:
x_u10

In [51]:
x_v10

In [52]:
x_wind_speed

In [57]:
final_cube = xr.merge([x_t2m, x_tp, x_u10, x_v10, x_wind_speed])

In [64]:
final_cube = final_cube.rename({'longitude' : 'x', 'latitude': 'y'})

In [65]:
final_cube

In [66]:
path = Path.home() / 'hdd1/diogenis/observatory/dwd_meteo.nc'
final_cube.to_netcdf(path)