### 📘 GRIB to Excel Converter with 6-Hour Downsampling

This script processes GRIB files containing ERA5 weather variables and converts them into Excel files for analysis and visualization.

#### ✅ Key Features:
- 📥 **Loads** `.grib` files for a specific variable and year range
- ⏬ **Downsamples** to every 6 hours to reduce file size and match GraphCast frequency
- 📊 **Reshapes** data to wide-format:
  - Rows: `(latitude, longitude)`
  - Columns: hourly timestamps
- 📁 **Saves output** as a multi-sheet Excel file (one sheet per year)

This is useful for long-term climate or weather modeling, especially for applications like flood prediction in Bhutan.


In [1]:
import os
import re
import xarray as xr
import pandas as pd

In [2]:
def convert_ds_to_df_wide(ds, downsample_every=6):
    """
    Convert an xarray dataset with time/step dimensions to a wide-format DataFrame.

    Parameters:
    - ds: xarray.Dataset
    - downsample_every: int, how often to sample from time steps (e.g., every 6 hours)

    Returns:
    - df_wide: pd.DataFrame, columns are datetime, rows are latitude/longitude grid
    """
    var = list(ds.data_vars)[0]
    da = ds[var]

    if "step" in da.dims:
        # Handle time+step combination (e.g., surface_runoff)
        # ERA5 forecast variables (e.g., runoff) have both 'time' and 'step' dimensions.
        # 'valid_time' combines them to give the actual timestamp for each data point.
        # We use 'valid_time' to flatten and downsample the data along real time.
        valid_times = ds["valid_time"].values.flatten()
        da_reshaped = da.stack(datetime=("time", "step"))
        da_reshaped = da_reshaped.assign_coords(datetime=("datetime", valid_times))
        da_reshaped = da_reshaped.transpose("latitude", "longitude", "datetime")
        da_downsampled = da_reshaped.sel(datetime=da_reshaped.datetime[::downsample_every])
    else:
        da_downsampled = da.sel(time=da.time[::downsample_every])
        da_downsampled = da_downsampled.rename({"time": "datetime"})
        da_downsampled = da_downsampled.transpose("latitude", "longitude", "datetime")
        
    # Convert temperature from Kelvin to Celsius if applicable
    if var in ["2m_temperature", "t2m"]:
        da_downsampled = da_downsampled - 273.15

    # Convert to wide DataFrame
    df = da_downsampled.to_dataframe().reset_index()
    df_wide = df.pivot_table(index=["latitude", "longitude"], columns="datetime", values=var).reset_index()
    df_wide.columns.name = None
    df_wide = df_wide.rename_axis(None, axis=0)
    df_wide = df_wide[['latitude', 'longitude'] + [col for col in df_wide.columns if col not in ['latitude', 'longitude']]]

    # rounds all datetime columns (after pivot) to a common 6-hour grid.
    # Ensures that 01:00, 07:00, etc., become 00:00, 06:00, etc.
    df_wide.columns = (
    df_wide.columns[:2].tolist() +
    [pd.to_datetime(col).round("6h") if isinstance(col, pd.Timestamp) else col
     for col in df_wide.columns[2:]]
    )

    return df_wide


In [3]:
def process_variable_to_excel(variable_name, input_base, output_base, start_year, end_year):
    input_folder = os.path.join(input_base, variable_name)
    output_path = os.path.join(output_base, f"{variable_name}_6hour_{start_year}_{end_year}.xlsx")
    os.makedirs(output_base, exist_ok=True)
    
    # Check if output file already exists and is larger than 10MB
    if os.path.exists(output_path) and os.path.getsize(output_path) > 10 * 1024 * 1024:
        print(f"⚠️ Skipping {variable_name} — Excel file already exists and is >10MB")
        return

    print(f"\n📂 Processing variable: {variable_name}")
    print(f"📁 Input folder: {input_folder}")
    print(f"💾 Output Excel: {output_path}\n")

    writer = pd.ExcelWriter(output_path, engine="openpyxl")
    success_count = 0

    for year in range(start_year, end_year + 1):
        filename = f"{variable_name}_{year}.grib"
        file_path = os.path.join(input_folder, filename)

        if not os.path.exists(file_path):
            print(f"❌ Missing: {file_path}")
            continue

        print(f"📥 Loading: {file_path}")
        try:
            ds = xr.open_dataset(file_path, engine="cfgrib")
            df_wide = convert_ds_to_df_wide(ds)

            df_wide.to_excel(writer, sheet_name=str(year), index=False)
            print(f"✅ Saved sheet '{year}' — shape: {df_wide.shape}")
            success_count += 1

        except Exception as e:
            print(f"❌ Failed for {file_path}: {e}")

    writer.close()
    print(f"\n✅ Done. {success_count} years saved for '{variable_name}'")
    
    
    

In [4]:
# === Run for all target variables ===

input_base = "../../era5_data_grib_raw"
output_base = "../../era5_data_excel"

variables = [
    "total_precipitation",
    "2m_temperature",
    "10m_u_component_of_wind",
    "10m_v_component_of_wind",
    "surface_runoff",
    "sub_surface_runoff",
]

for var in variables:
    process_variable_to_excel(var, input_base, output_base, start_year=2000, end_year=2025)


📂 Processing variable: total_precipitation
📁 Input folder: ../../era5_data_grib_raw/total_precipitation
💾 Output Excel: ../../era5_data_excel/total_precipitation_6hour_2000_2025.xlsx



Ignoring index file '../../era5_data_grib_raw/total_precipitation/total_precipitation_2000.grib.5b7b6.idx' incompatible with GRIB file


📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2000.grib


Ignoring index file '../../era5_data_grib_raw/total_precipitation/total_precipitation_2001.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2000' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2001.grib


Ignoring index file '../../era5_data_grib_raw/total_precipitation/total_precipitation_2002.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2001' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2002.grib


Ignoring index file '../../era5_data_grib_raw/total_precipitation/total_precipitation_2003.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2002' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2003.grib


Ignoring index file '../../era5_data_grib_raw/total_precipitation/total_precipitation_2004.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2003' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2004.grib


Ignoring index file '../../era5_data_grib_raw/total_precipitation/total_precipitation_2005.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2004' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2005.grib


Ignoring index file '../../era5_data_grib_raw/total_precipitation/total_precipitation_2006.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2005' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2006.grib


Ignoring index file '../../era5_data_grib_raw/total_precipitation/total_precipitation_2007.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2006' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2007.grib


Ignoring index file '../../era5_data_grib_raw/total_precipitation/total_precipitation_2008.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2007' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2008.grib


Ignoring index file '../../era5_data_grib_raw/total_precipitation/total_precipitation_2009.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2008' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2009.grib


Ignoring index file '../../era5_data_grib_raw/total_precipitation/total_precipitation_2010.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2009' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2010.grib


Can't read index file '../../era5_data_grib_raw/total_precipitation/total_precipitation_2011.grib.5b7b6.idx'
Traceback (most recent call last):
  File "/Users/qingfangliu/opt/anaconda3/lib/python3.9/site-packages/cfgrib/messages.py", line 551, in from_indexpath_or_filestream
    self = cls.from_indexpath(indexpath)
  File "/Users/qingfangliu/opt/anaconda3/lib/python3.9/site-packages/cfgrib/messages.py", line 430, in from_indexpath
    index = pickle.load(file)
EOFError: Ran out of input


✅ Saved sheet '2010' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2011.grib
✅ Saved sheet '2011' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2012.grib
✅ Saved sheet '2012' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2013.grib
✅ Saved sheet '2013' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2014.grib
✅ Saved sheet '2014' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2015.grib
✅ Saved sheet '2015' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2016.grib
✅ Saved sheet '2016' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_precipitation_2017.grib
✅ Saved sheet '2017' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/total_precipitation/total_

Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2000.grib.5b7b6.idx' incompatible with GRIB file



✅ Done. 26 years saved for 'total_precipitation'
⚠️ Skipping 2m_temperature — Excel file already exists and is >10MB
⚠️ Skipping 10m_u_component_of_wind — Excel file already exists and is >10MB
⚠️ Skipping 10m_v_component_of_wind — Excel file already exists and is >10MB

📂 Processing variable: surface_runoff
📁 Input folder: ../../era5_data_grib_raw/surface_runoff
💾 Output Excel: ../../era5_data_excel/surface_runoff_6hour_2000_2025.xlsx

📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2000.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2001.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2000' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2001.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2002.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2001' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2002.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2003.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2002' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2003.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2004.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2003' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2004.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2005.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2004' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2005.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2006.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2005' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2006.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2007.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2006' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2007.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2008.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2007' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2008.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2009.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2008' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2009.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2010.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2009' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2010.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2011.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2010' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2011.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2012.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2011' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2012.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2013.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2012' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2013.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2014.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2013' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2014.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2015.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2014' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2015.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2016.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2015' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2016.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2017.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2016' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2017.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2018.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2017' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2018.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2019.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2018' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2019.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2020.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2019' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2020.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2021.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2020' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2021.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2022.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2021' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2022.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2023.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2022' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2023.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2024.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2023' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2024.grib


Ignoring index file '../../era5_data_grib_raw/surface_runoff/surface_runoff_2025.grib.5b7b6.idx' incompatible with GRIB file


✅ Saved sheet '2024' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/surface_runoff/surface_runoff_2025.grib
✅ Saved sheet '2025' — shape: (135, 755)

✅ Done. 26 years saved for 'surface_runoff'

📂 Processing variable: sub_surface_runoff
📁 Input folder: ../../era5_data_grib_raw/sub_surface_runoff
💾 Output Excel: ../../era5_data_excel/sub_surface_runoff_6hour_2000_2025.xlsx

📥 Loading: ../../era5_data_grib_raw/sub_surface_runoff/sub_surface_runoff_2000.grib
✅ Saved sheet '2000' — shape: (135, 1466)
📥 Loading: ../../era5_data_grib_raw/sub_surface_runoff/sub_surface_runoff_2001.grib
✅ Saved sheet '2001' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/sub_surface_runoff/sub_surface_runoff_2002.grib
✅ Saved sheet '2002' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/sub_surface_runoff/sub_surface_runoff_2003.grib
✅ Saved sheet '2003' — shape: (135, 1462)
📥 Loading: ../../era5_data_grib_raw/sub_surface_runoff/sub_surface_runoff_2004.grib
✅ Saved sheet '2004' — sha