In [2]:
### import usefull packages

import sys
import os
from collections import OrderedDict
import numpy as np
import pandas as pd
import xarray as xr
import sys
import cdsapi

In [3]:
version = "v27062024"
raw_dir = os.path.expanduser(f"~/data/weather_data/corpenicus/ERA5/raw")

version_dir = f"{raw_dir}/{version}"  # append with version label
print("Directory for specified version:", version_dir)

Directory for specified version: /fast/home/e-pena/data/weather_data/corpenicus/ERA5/raw/v01062024


### Step 1: find and organize the u and v .nc vector files resulting from the ERA5 downloading script

In [23]:
height = 100
standard_name = f"ws{height}"
units = "m s-1"

assert os.path.isdir(version_dir)  # check if dir exists. if not, adapt the paths avobe

nc_files = []

for root, dirs, files in os.walk(version_dir):
    for file in files:
        if file.endswith(".nc"):
            if "100m_u_component_of_wind.nc" in file:
                nc_files.append(os.path.join(root, file))

nc_files = sorted(nc_files)
print(len(nc_files), " u component files found")

year_dict = {}

for u_path in nc_files:
    year = u_path.split("/")[-2]  # Extract the year from the file path
    assert len(year) == 4

    v_path = u_path.replace(
        "100m_u_component_of_wind.nc", "100m_v_component_of_wind.nc"
    )
    assert os.path.isfile(v_path)
    uv_files = (u_path, v_path)
    if year not in year_dict:
        year_dict[year] = [uv_files]
    else:
        year_dict[year].append(uv_files)


print(len(year_dict.keys()), " wind speed files to process: ", year_dict.keys())

2  u component files found
2018 1
2019 1
2  wind speed files to process


### Step 2: process the wind speed files for every year

In [25]:
for taskID in range(0, len(year_dict.keys())):
    print(taskID)
    sel_year = list(year_dict.keys())[taskID]
    sel_tuples = year_dict[sel_year]
    sel_tuples
    output_file_name = f"reanalysis-era5-single-levels.{sel_year}.100m_wind_speed.nc"
    year_output = os.path.join(
        version_dir, sel_year, output_file_name
    )  # addapt if necessary
    assert os.path.isdir(
        os.path.dirname(year_output)
    )  # check that the output directory exists

    if os.path.isfile(year_output):
        print("already processed: checking it..", end=" ")
        try:
            print("opening it")
            year_ds = xr.open_dataset(year_output)
            print("correct")
        except:
            print(year_output, "is incorrect.", end=" ")
            os.remove(year_output)
            print("Deleted")

    if not os.path.isfile(year_output):
        print("processing :", sel_year)

        for tuple in sel_tuples:
            u_file, v_file = tuple
            print(os.path.basename(u_file), os.path.basename(v_file))
            long_name = f"Total wind speed at {height} m. Processed from ERA5: {u_file}, {v_file}"

            # Open the datasets
            ds_u = xr.open_dataset(u_file)
            ds_v = xr.open_dataset(v_file)

            # Extract u and v component data
            data_u = ds_u[f"u{height}"][:]
            data_v = ds_v[f"v{height}"][:]

            # Calculate total wind speed
            data_uv = np.sqrt(np.power(data_u, 2) + np.power(data_v, 2))

            ds_u.close()
            ds_v.close()
            data_u.close()
            data_v.close()

            atts_dict = {
                "standard_name": standard_name,
                "long_name": long_name,
                "units": units,
            }
            ds_uv = xr.Dataset({standard_name: data_uv})
            ds_uv = ds_uv.assign_attrs(atts_dict)

            print("saving")

            ds_uv.to_netcdf(year_output)
            print("Done:", year_output)
            ds_uv.close()

0
processing : 2018
reanalysis-era5-single-levels.2018.100m_u_component_of_wind.nc reanalysis-era5-single-levels.2018.100m_v_component_of_wind.nc
saving
Done: /storage_cluster/projects/2022_e-pena_PhD/Projects/RESkit/Paper2023/Jugit_files/ERA5/raw/v01062024/2018/reanalysis-era5-single-levels.2018.100m_wind_speed.nc
1
processing : 2019
reanalysis-era5-single-levels.2019.100m_u_component_of_wind.nc reanalysis-era5-single-levels.2019.100m_v_component_of_wind.nc
saving
Done: /storage_cluster/projects/2022_e-pena_PhD/Projects/RESkit/Paper2023/Jugit_files/ERA5/raw/v01062024/2019/reanalysis-era5-single-levels.2019.100m_wind_speed.nc


In [22]:
os.path.dirname(year_output)

'/storage_cluster/projects/2022_e-pena_PhD/Projects/RESkit/Paper2023/Jugit_files/ERA5/raw/v01062024/reanalysis-era5-single-levels.2018.100m_u_component_of_wind.nc'