In [1]:
import xarray as xr
import numpy as np
import sys, os, glob
import time
from scipy.stats import weibull_min
import dask.distributed as dd

root_dir = '/media/harish/SSD_4TB/EU_SCORES_project'
scripts_dir = f'{root_dir}/scripts'
sys.path.append(scripts_dir)

from data_processing.libraries import mean_statistics, std_statistics, wind_power_density, weibull , weibull_statistics

In [4]:
client.close()
cluster.close()

In [None]:
# Create a Dask cluster
print("Starting parallel computing...")
import dask.distributed as dd
cluster = dd.LocalCluster(n_workers=8, threads_per_worker=2,memory_limit='2GB',dashboard_address=':8787')
# Connect to the cluster
client = dd.Client(cluster)
print(client)

In [5]:
root_dir = '/media/harish/SSD_4TB/EU_SCORES_project'
run = 'New_runs'
cases = ['Germany_coast', 'Portugal_coast', 'Ireland_coast', 'Netherlands_coast']
for case in cases:
    run_dir=f'{root_dir}/WRFV4.4/EU_SCORES/{run}/{case}/Postprocessed/variablewise_files'

    file_names = ['ws_10', 'ws_80', 'ws_100', 'ws_120', 'ws_150',
                'wpd_80', 'wpd_100', 'wpd_120', 'wpd_150',
                '8MW/tp_80', '8MW/tp_100', '8MW/tp_120', '8MW/tp_150',
                '15MW/tp_80', '15MW/tp_100', '15MW/tp_120', '15MW/tp_150',
                'spv','SWDOWN2','T2']
    for file_name in file_names:

        print(f'Checking {case} {file_name}')
        with xr.open_dataset(f'{run_dir}/{file_name}.nc') as ds:
            if 'XLAT' not in ds.coords and 'XLONG' not in ds.coords:
                print(f'No lat lon in {case} {file_name}')

Checking Germany_coast ws_10
Checking Germany_coast ws_80
Checking Germany_coast ws_100
Checking Germany_coast ws_120
Checking Germany_coast ws_150
Checking Germany_coast wpd_80
Checking Germany_coast wpd_100
Checking Germany_coast wpd_120
Checking Germany_coast wpd_150
Checking Germany_coast 8MW/tp_80
Checking Germany_coast 8MW/tp_100
Checking Germany_coast 8MW/tp_120
Checking Germany_coast 8MW/tp_150
Checking Germany_coast 15MW/tp_80
Checking Germany_coast 15MW/tp_100
Checking Germany_coast 15MW/tp_120
Checking Germany_coast 15MW/tp_150
Checking Germany_coast spv
Checking Germany_coast SWDOWN2
Checking Germany_coast T2
Checking Portugal_coast ws_10
Checking Portugal_coast ws_80
Checking Portugal_coast ws_100
Checking Portugal_coast ws_120
Checking Portugal_coast ws_150
Checking Portugal_coast wpd_80
Checking Portugal_coast wpd_100
Checking Portugal_coast wpd_120
Checking Portugal_coast wpd_150
Checking Portugal_coast 8MW/tp_80
Checking Portugal_coast 8MW/tp_100
Checking Portugal_coas

In [3]:
def overall_statistic(ds,statistic,time_coord='Time',quantile=None):
    if statistic == 'std':
        return ds.std(dim=time_coord).compute()
    elif statistic == 'quantile':
        return ds.quantile(quantile,dim=time_coord,method='inverted_cdf').compute()
    else:
        print(f'Invalid statistic {statistic}')
        return None

In [2]:
root_dir = '/media/harish/SSD_4TB/EU_SCORES_project'
run = 'New_runs'

chunks={"Time": -1,"south_north": 8,"west_east": 8}
case = 'Portugal_coast'
file_name = 'T2'
variable = 'T2'
run_dir=f'{root_dir}/WRFV4.4/EU_SCORES/{run}/{case}/Postprocessed/variablewise_files'
target_dir=f'{root_dir}/WRFV4.4/EU_SCORES/{run}/{case}/Postprocessed/statistics_files/{file_name}'

statistic = 'std'
ds = xr.open_dataset(f'{target_dir}/{statistic}.nc')
ds

In [2]:
root_dir = '/media/harish/SSD_4TB/EU_SCORES_project'
run = 'New_runs'

chunks={"Time": -1,"south_north": 8,"west_east": 8}
case = 'Portugal_coast'
file_name = 'T2'
variable = 'T2'
run_dir=f'{root_dir}/WRFV4.4/EU_SCORES/{run}/{case}/Postprocessed/variablewise_files'
target_dir=f'{root_dir}/WRFV4.4/EU_SCORES/{run}/{case}/Postprocessed/statistics_files/{file_name}'

ds = xr.open_dataset(f'{run_dir}/{file_name}.nc',chunks=chunks)[variable]
data = ds.isel(Time=slice(None,-1))
data

Unnamed: 0,Array,Chunk
Bytes,16.33 GiB,66.35 MiB
Shape,"(271752, 127, 127)","(271752, 8, 8)"
Dask graph,256 chunks in 3 graph layers,256 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 16.33 GiB 66.35 MiB Shape (271752, 127, 127) (271752, 8, 8) Dask graph 256 chunks in 3 graph layers Data type float32 numpy.ndarray",127  127  271752,

Unnamed: 0,Array,Chunk
Bytes,16.33 GiB,66.35 MiB
Shape,"(271752, 127, 127)","(271752, 8, 8)"
Dask graph,256 chunks in 3 graph layers,256 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [4]:
root_dir = '/media/harish/SSD_4TB/EU_SCORES_project'
run = 'New_runs'

chunks={"Time": -1,"south_north": 8,"west_east": 8}
case = 'Netherlands_coast'
file_name = 'T2'
variable = 'T2'
run_dir=f'{root_dir}/WRFV4.4/EU_SCORES/{run}/{case}/Postprocessed/variablewise_files'
target_dir=f'{root_dir}/WRFV4.4/EU_SCORES/{run}/{case}/Postprocessed/statistics_files/{file_name}'

ds = xr.open_dataset(f'{run_dir}/{file_name}.nc',chunks=chunks)[variable]
data = ds.isel(Time=slice(None,-1))

# chech if XLAT and XLONG are present in the dataset, if not, add them to data
if 'XLAT' not in data.coords or 'XLONG' not in data.coords:
    XLAND = xr.open_dataset(f'{run_dir}/XLAND.nc')
    XLAT = XLAND.XLAT
    XLONG = XLAND.XLONG
    data = data.assign_coords(XLAT=XLAT, XLONG=XLONG)

statistic = 'std'
with xr.open_dataset(f'{target_dir}/{statistic}.nc') as ds:
    ds['overall_values'] = overall_statistic(data,statistic)
    ds = ds.load()

# save the ds to the same file
ds.to_netcdf(f'{target_dir}/{statistic}.nc',mode='w')