## This code converts grib files to excel files

In [1]:
import os
import re
import xarray as xr
import pandas as pd

In [None]:
def process_variable_to_excel(variable_name, input_base, output_base, start_year, end_year):
    input_folder = os.path.join(input_base, variable_name)
    output_path = os.path.join(output_base, f"{variable_name}_6hour_{start_year}_{end_year}.xlsx")
    os.makedirs(output_base, exist_ok=True)

    print(f"\n📂 Processing variable: {variable_name}")
    print(f"📁 Input folder: {input_folder}")
    print(f"💾 Output Excel: {output_path}\n")

    writer = pd.ExcelWriter(output_path, engine="openpyxl")
    success_count = 0

    for year in range(start_year, end_year + 1):
        filename = f"{variable_name}_{year}.grib"
        file_path = os.path.join(input_folder, filename)

        if not os.path.exists(file_path):
            print(f"❌ Missing: {file_path}")
            continue

        print(f"📥 Loading: {file_path}")
        try:
            ds = xr.open_dataset(file_path, engine="cfgrib")
            var = list(ds.data_vars)[0]

            # Convert to DataArray and downsample to 6-hourly
            data_6h = ds[var].sel(time=ds.time[::6])

            # Reshape to wide format
            df = data_6h.transpose("latitude", "longitude", "time").to_dataframe().reset_index()
            df_wide = df.pivot_table(index=["latitude", "longitude"], columns="time", values=var).reset_index()
            df_wide.columns.name = None
            df_wide = df_wide.rename_axis(None, axis=0)
            cols = ['latitude', 'longitude'] + [col for col in df_wide.columns if col not in ['latitude', 'longitude']]
            df_wide = df_wide[cols]

            df_wide.to_excel(writer, sheet_name=str(year), index=False)
            print(f"✅ Saved sheet '{year}' — shape: {df_wide.shape}")
            success_count += 1

        except Exception as e:
            print(f"❌ Failed for {file_path}: {e}")

    writer.close()
    print(f"\n✅ Done. {success_count} years saved for '{variable_name}'")

# === Run for all target variables ===

input_base = "../../era5_data"
output_base = "../../era5_data_output"

variables = [
    #"total_precipitation",
    "2m_temperature",
    "10m_u_component_of_wind",
    "10m_v_component_of_wind",
    "surface_runoff",
    "sub_surface_runoff"
]

for var in variables:
    process_variable_to_excel(var, input_base, output_base, start_year=2000, end_year=2025)



📂 Processing variable: 10m_u_component_of_wind
📁 Input folder: ../../era5_data/10m_u_component_of_wind
💾 Output Excel: ../../era5_data_output/10m_u_component_of_wind_6hour_2000_2025.xlsx

📥 Loading: ../../era5_data/10m_u_component_of_wind/10m_u_component_of_wind_2000.grib
✅ Saved sheet '2000' — shape: (135, 1466)
📥 Loading: ../../era5_data/10m_u_component_of_wind/10m_u_component_of_wind_2001.grib
✅ Saved sheet '2001' — shape: (135, 1462)
📥 Loading: ../../era5_data/10m_u_component_of_wind/10m_u_component_of_wind_2002.grib
✅ Saved sheet '2002' — shape: (135, 1462)
📥 Loading: ../../era5_data/10m_u_component_of_wind/10m_u_component_of_wind_2003.grib
✅ Saved sheet '2003' — shape: (135, 1462)
📥 Loading: ../../era5_data/10m_u_component_of_wind/10m_u_component_of_wind_2004.grib
✅ Saved sheet '2004' — shape: (135, 1466)
📥 Loading: ../../era5_data/10m_u_component_of_wind/10m_u_component_of_wind_2005.grib
✅ Saved sheet '2005' — shape: (135, 1462)
📥 Loading: ../../era5_data/10m_u_component_of_win