## Setting Up:

In [1]:
import sys
import os
import pandas as pd
import os
import warnings
from tqdm.notebook import tqdm
import zipfile
import cdsapi
import zipfile
import numpy as np
import glob
import xarray as xr

regions_path = '/home/mburlet/MBM/MassBalanceMachine/regions/French_Alps'
sys.path.insert(0, regions_path)
from scripts.config_FR import *

# Print paths to verify
print("Python path:", sys.path)
print("\nChecking if scripts directory exists:")
print(os.path.exists(os.path.join(regions_path, 'scripts')))
print("\nListing contents of regions directory:")
print(os.listdir(regions_path))

# Debug prints
print(f"1. Current working directory: {os.getcwd()}")
print(f"2. Regions path exists: {os.path.exists(regions_path)}")
print(f"3. Scripts directory exists: {os.path.exists(os.path.join(regions_path, 'scripts'))}")
print(f"4. Config file exists: {os.path.exists(os.path.join(regions_path, 'scripts', 'config_FR.py'))}")
print(f"5. sys.path: {sys.path}")

warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2

Python path: ['/home/mburlet/MBM/MassBalanceMachine/regions/French_Alps', '/home/mburlet/anaconda3/envs/MassBalanceMachine/lib/python311.zip', '/home/mburlet/anaconda3/envs/MassBalanceMachine/lib/python3.11', '/home/mburlet/anaconda3/envs/MassBalanceMachine/lib/python3.11/lib-dynload', '', '/home/mburlet/anaconda3/envs/MassBalanceMachine/lib/python3.11/site-packages', '/home/mburlet/MBM/MassBalanceMachine']

Checking if scripts directory exists:
True

Listing contents of regions directory:
['1.3. ERA5Land-prepro.ipynb', '3.2. Train-ML-model.ipynb', '1.1. GLAMOS-prepro.ipynb', 'prcp_fac_array.npy', '1.2. OGGM-datapulling.ipynb', '1.1. GLACIOCLIM-prepro.ipynb', 'scripts', 'w_prcp_array.npy']
1. Current working directory: /home/mburlet/MBM/MassBalanceMachine/regions/French_Alps
2. Regions path exists: True
3. Scripts directory exists: True
4. Config file exists: True
5. sys.path: ['/home/mburlet/MBM/MassBalanceMachine/regions/French_Alps', '/home/mburlet/anaconda3/envs/MassBalanceMachine/

## Download monthly ERA5-Land variables:

In [2]:
RUN = True
if RUN:
    os.makedirs(path_ERA5_raw, exist_ok=True)
    c = cdsapi.Client()
    c.retrieve(
        'reanalysis-era5-land-monthly-means', {
            'product_type': ['monthly_averaged_reanalysis'],
            'variable': [
                '10m_u_component_of_wind',
                '10m_v_component_of_wind',
                '2m_temperature',
                'forecast_albedo',
                'snow_cover',
                'snow_density',
                'snow_depth_water_equivalent',
                'snowfall',
                'snowmelt',
                'surface_latent_heat_flux',
                'surface_net_thermal_radiation',
                'surface_sensible_heat_flux',
                'surface_solar_radiation_downwards',
                'total_precipitation',
            ],
            'year': [
                '1950',
                '1951',
                '1952',
                '1953',
                '1954',
                '1955',
                '1956',
                '1957',
                '1958',
                '1959',
                '1960',
                '1961',
                '1962',
                '1963',
                '1964',
                '1965',
                '1966',
                '1967',
                '1968',
                '1969',
                '1970',
                '1971',
                '1972',
                '1973',
                '1974',
                '1975',
                '1976',
                '1977',
                '1978',
                '1979',
                '1980',
                '1981',
                '1982',
                '1983',
                '1984',
                '1985',
                '1986',
                '1987',
                '1988',
                '1989',
                '1990',
                '1991',
                '1992',
                '1993',
                '1994',
                '1995',
                '1996',
                '1997',
                '1998',
                '1999',
                '2000',
                '2001',
                '2002',
                '2003',
                '2004',
                '2005',
                '2006',
                '2007',
                '2008',
                '2009',
                '2010',
                '2011',
                '2012',
                '2013',
                '2014',
                '2015',
                '2016',
                '2017',
                '2018',
                '2019',
                '2020',
                '2021',
                '2022',
                '2023',
                '2024',
            ],
            'month': [
                '01',
                '02',
                '03',
                '04',
                '05',
                '06',
                '07',
                '08',
                '09',
                '10',
                '11',
                '12',
            ],
            'time': ['00:00'],
            "data_format": "netcdf",
            "download_format": "zip",
            'area': [
                47,
                3,
                43,
                8,
            ],
        }, path_ERA5_raw+'download.netcdf.zip')
    with zipfile.ZipFile(path_ERA5_raw+'download.netcdf.zip', 'r') as zip:
        zip.extractall(path_ERA5_raw)
    c.retrieve("reanalysis-era5-single-levels", {
            "product_type": ["reanalysis"],
            "variable": ["geopotential"],
            "year": ["2024"],
            "month": ["06"],
            "day": ["01"],
            "time": ["12:00"],
            "data_format": "netcdf"
        }, path_ERA5_raw+'era5_geopotential_pressure.nc')

HTTPError: 404 Client Error: Not Found for url: https://cds.climate.copernicus.eu/api/v2/retrieve/v1/processes/reanalysis-era5-land-monthly-means


In [8]:

import cdsapi

dataset = "reanalysis-era5-land-monthly-means"
request = {
    "product_type": ["monthly_averaged_reanalysis"],
    "variable": [
        "2m_temperature",
        "snow_cover",
        "snow_density",
        "snow_depth_water_equivalent",
        "snowfall",
        "snowmelt",
        "forecast_albedo",
        "surface_latent_heat_flux",
        "surface_net_thermal_radiation",
        "surface_sensible_heat_flux",
        "surface_solar_radiation_downwards",
        "10m_u_component_of_wind",
        "10m_v_component_of_wind",
        "total_precipitation"
    ],
    "year": [
        "1957", "1958", "1959",
        "1960", "1961", "1962",
        "1963", "1964", "1965",
        "1966", "1967", "1968",
        "1969", "1970", "1971",
        "1972", "1973", "1974",
        "1975", "1976", "1977",
        "1978", "1979", "1980",
        "1981", "1982", "1983",
        "1984", "1985", "1986",
        "1987", "1988", "1989",
        "1990", "1991", "1992",
        "1993", "1994", "1995",
        "1996", "1997", "1998",
        "1999", "2000", "2001",
        "2002", "2003", "2004",
        "2005", "2006", "2007",
        "2008", "2009", "2010",
        "2011", "2012", "2013",
        "2014", "2015", "2016",
        "2017", "2018", "2019",
        "2020", "2021", "2022"
    ],
    "month": [
        "01", "02", "03",
        "04", "05", "06",
        "07", "08", "09",
        "10", "11", "12"
    ],
    "time": ["00:00"],
    "data_format": "netcdf",
    "download_format": "zip",
    "area": [47, 3, 43, 8]
}

client = cdsapi.Client()
client.retrieve(dataset, request).download()


dataset = "reanalysis-era5-single-levels"
request = {
    "product_type": ["reanalysis"],
    "variable": ["geopotential"],
    "year": ["2024"],
    "month": ["06"],
    "day": ["01"],
    "time": ["12:00"],
    "data_format": "netcdf",
    "download_format": "zip"
}

client = cdsapi.Client()
client.retrieve(dataset, request, "era5_geopotential.zip")

HTTPError: 404 Client Error: Not Found for url: https://cds.climate.copernicus.eu/api/v2/retrieve/v1/processes/reanalysis-era5-land-monthly-means


In [4]:
print(f"1. Current working directory: {os.getcwd()}")

1. Current working directory: /home/mburlet/MBM/MassBalanceMachine/regions/French_Alps


In [6]:
! ls '/home/mburlet/DATA_MBM/ERA5Land/raw/'

1c2e026868bf7fb127ddfbdc1e9efb84.zip  era5_geopotential_pressure_scratch.nc
data_stream-moda.nc		      era5_geopotential.zip
data_stream-oper_stepType-instant.nc  era5_monthly_averaged_data_scratch.nc


In [7]:
xr.open_dataset(path_ERA5_raw+'data_stream-moda.nc')

In [4]:
dcs = []
for path in glob.glob(path_ERA5_raw+'*.nc'):
    dcs.append(xr.open_dataset(path))
print(dcs)

[<xarray.Dataset> Size: 4MB
Dimensions:     (valid_time: 1, latitude: 721, longitude: 1440)
Coordinates:
    number      int64 8B ...
  * valid_time  (valid_time) datetime64[ns] 8B 2024-06-01T12:00:00
  * latitude    (latitude) float64 6kB 90.0 89.75 89.5 ... -89.5 -89.75 -90.0
  * longitude   (longitude) float64 12kB 0.0 0.25 0.5 0.75 ... 359.2 359.5 359.8
    expver      <U4 16B ...
Data variables:
    z           (valid_time, latitude, longitude) float32 4MB ...
Attributes:
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:                 2025-03-31T14:48 GRIB to CDM+CF via cfgrib-0.9.1..., <xarray.Dataset> Size: 105MB
Dimensions:     (valid_time: 900, latitude: 41, longitude: 51)
Coordinates:
    number      int64 8B ...
  * valid_time  (valid_time) datetime64[n

In [5]:
coordsVar = []
dataVar = []
for dc in dcs:
    coordsVar += list(dc.coords)
    dataVar += list(dc.data_vars)
coordsVar = np.unique(coordsVar).tolist()
dataVar = np.unique(dataVar).tolist()
print(f"{coordsVar=}")
print(f"{dataVar=}")

coords = {k:[] for k in coordsVar}
dataTypes = {}
for dc in dcs:
    for k in dc.coords:
        if len(dc[k].shape)>0:
            coords[k] +=  list(dc[k].values)
    for k in dc.data_vars:
        if k not in dataTypes:
            dataTypes[k] = dc[k].dtype
coords = {k: np.sort(np.unique(np.array(coords[k]))) for k in coords}

coordsVar=['expver', 'latitude', 'longitude', 'number', 'valid_time']
dataVar=['fal', 'rsn', 'sd', 'sf', 'slhf', 'smlt', 'snowc', 'sshf', 'ssrd', 'str', 't2m', 'tp', 'u10', 'v10', 'z']


In [6]:
discardVar = ['expver']

size = tuple(len(coords[k]) for k in coords if len(coords[k])>0)
print(f"{size=}")
dims = tuple(k for k in coords if len(coords[k])>0 and k not in discardVar)
print(f"{dims=}")
data = {}
for k in dataVar:
    data[k] = np.zeros(size, dtype=dataTypes[k])

size=(1, 760, 1489, 901)
dims=('latitude', 'longitude', 'valid_time')


In [8]:
for e, dc in enumerate(dcs):
    print(f"Processing datacube n°{e} / {len(dcs)}")
    for k in tqdm(dc.data_vars):
        idx = []
        selVar = []
        for c in dims+tuple(discardVar):
            if len(dc[c].shape)>0 and c not in discardVar:
                s = list(dc[c].values)
                sorter = np.argsort(coords[c])
                tmp = sorter[np.searchsorted(coords[c], s, sorter=sorter)]
                idx.append(tmp)
            elif c in discardVar:
                # Handle both scalar and vector cases for expver
                if len(dc[c].shape) == 0:
                    s = [dc[c].values.item()]  # Convert scalar to list
                else:
                    s = list(dc[c].values)
                sorter = np.argsort(coords[c])
                tmp = sorter[np.searchsorted(coords[c], s, sorter=sorter)]
                selVar.append(tmp)
        assert len(selVar)==1
        
        for v in selVar[0]:
            data[k][v][idx[0],:,:][:,idx[1],:][:,:,idx[2]] = dc[k].data.transpose((1,2,0))


Processing datacube n°0 / 2


  0%|          | 0/1 [00:00<?, ?it/s]

Processing datacube n°1 / 2


  0%|          | 0/14 [00:00<?, ?it/s]

In [9]:
ds = xr.Dataset(
    {k: (tuple(discardVar)+dims, data[k]) for k in data},
    coords=coords
)
ds.to_netcdf(path_ERA5_raw+"era5_monthly_averaged_data.nc")

In [10]:
print(ds)

<xarray.Dataset> Size: 61GB
Dimensions:     (expver: 1, latitude: 760, longitude: 1489, valid_time: 901,
                 number: 0)
Coordinates:
  * expver      (expver) <U4 16B '0001'
  * latitude    (latitude) float64 6kB -90.0 -89.75 -89.5 ... 89.5 89.75 90.0
  * longitude   (longitude) float64 12kB 0.0 0.25 0.5 0.75 ... 359.2 359.5 359.8
  * number      (number) float64 0B 
  * valid_time  (valid_time) datetime64[ns] 7kB 1950-01-01 ... 2024-12-01
Data variables: (12/15)
    fal         (expver, latitude, longitude, valid_time) float32 4GB 0.0 ......
    rsn         (expver, latitude, longitude, valid_time) float32 4GB 0.0 ......
    sd          (expver, latitude, longitude, valid_time) float32 4GB 0.0 ......
    sf          (expver, latitude, longitude, valid_time) float32 4GB 0.0 ......
    slhf        (expver, latitude, longitude, valid_time) float32 4GB 0.0 ......
    smlt        (expver, latitude, longitude, valid_time) float32 4GB 0.0 ......
    ...          ...
    str      