## This code converts grib files to excel files

In [33]:
import os
import re
import xarray as xr
import pandas as pd

In [34]:
def convert_ds_to_df_wide(ds, downsample_every=6):
    """
    Convert an xarray dataset with time/step dimensions to a wide-format DataFrame.

    Parameters:
    - ds: xarray.Dataset
    - downsample_every: int, how often to sample from time steps (e.g., every 6 hours)

    Returns:
    - df_wide: pd.DataFrame, columns are datetime, rows are latitude/longitude grid
    """
    var = list(ds.data_vars)[0]
    da = ds[var]

    if "step" in da.dims:
        # Handle time+step combination (e.g., surface_runoff)
        # ERA5 forecast variables (e.g., runoff) have both 'time' and 'step' dimensions.
        # 'valid_time' combines them to give the actual timestamp for each data point.
        # We use 'valid_time' to flatten and downsample the data along real time.
        valid_times = ds["valid_time"].values.flatten()
        da_reshaped = da.stack(datetime=("time", "step"))
        da_reshaped = da_reshaped.assign_coords(datetime=("datetime", valid_times))
        da_reshaped = da_reshaped.transpose("latitude", "longitude", "datetime")
        da_downsampled = da_reshaped.sel(datetime=da_reshaped.datetime[::downsample_every])
    else:
        da_downsampled = da.sel(time=da.time[::downsample_every])
        da_downsampled = da_downsampled.rename({"time": "datetime"})
        da_downsampled = da_downsampled.transpose("latitude", "longitude", "datetime")

    # Convert to wide DataFrame
    df = da_downsampled.to_dataframe().reset_index()
    df_wide = df.pivot_table(index=["latitude", "longitude"], columns="datetime", values=var).reset_index()
    df_wide.columns.name = None
    df_wide = df_wide.rename_axis(None, axis=0)
    df_wide = df_wide[['latitude', 'longitude'] + [col for col in df_wide.columns if col not in ['latitude', 'longitude']]]

    return df_wide


In [35]:
def process_variable_to_excel(variable_name, input_base, output_base, start_year, end_year):
    input_folder = os.path.join(input_base, variable_name)
    output_path = os.path.join(output_base, f"{variable_name}_6hour_{start_year}_{end_year}.xlsx")
    os.makedirs(output_base, exist_ok=True)

    print(f"\n📂 Processing variable: {variable_name}")
    print(f"📁 Input folder: {input_folder}")
    print(f"💾 Output Excel: {output_path}\n")

    writer = pd.ExcelWriter(output_path, engine="openpyxl")
    success_count = 0

    for year in range(start_year, end_year + 1):
        filename = f"{variable_name}_{year}.grib"
        file_path = os.path.join(input_folder, filename)

        if not os.path.exists(file_path):
            print(f"❌ Missing: {file_path}")
            continue

        print(f"📥 Loading: {file_path}")
        try:
            ds = xr.open_dataset(file_path, engine="cfgrib")
            df_wide = convert_ds_to_df_wide(ds)

            df_wide.to_excel(writer, sheet_name=str(year), index=False)
            print(f"✅ Saved sheet '{year}' — shape: {df_wide.shape}")
            success_count += 1

        except Exception as e:
            print(f"❌ Failed for {file_path}: {e}")

    writer.close()
    print(f"\n✅ Done. {success_count} years saved for '{variable_name}'")
    
    
    

In [None]:
# === Run for all target variables ===

input_base = "../../era5_data_grib_raw"
output_base = "../../era5_data_excel"

variables = [
    #"total_precipitation",
    #"2m_temperature",
    #"10m_u_component_of_wind",
    #"10m_v_component_of_wind",
    "surface_runoff",
    #"sub_surface_runoff",
]

for var in variables:
    process_variable_to_excel(var, input_base, output_base, start_year=2000, end_year=2025)

Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2000.grib.5b7b6.idx' incompatible with GRIB file



📂 Processing variable: surface_runoff
📁 Input folder: ../../era5_data_grib_raw/surface_runoff
💾 Output Excel: ../../era5_data_excel/surface_runoff_6hour_2000_2025.xlsx

📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2000.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2001.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2000' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2001.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2002.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2001' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2002.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2003.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2002' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2003.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2004.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2003' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2004.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2005.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2004' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2005.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2006.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2005' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2006.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2007.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2006' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2007.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2008.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2007' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2008.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2009.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2008' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2009.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2010.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2009' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2010.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2011.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2010' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2011.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2012.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2011' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2012.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2013.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2012' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2013.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2014.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2013' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2014.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2015.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2014' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2015.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2016.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2015' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2016.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2017.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2016' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2017.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2018.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2017' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2018.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2019.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2018' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2019.grib
