This script loads ERA5 monthly NetCDF data, extracts ssrd (surface solar radiation downward), averages it across Ireland, and saves the cleaned output in a .csv format — optimized for ML workflows.

In [None]:
#for working with NetCDF
import xarray as xr
import pandas as pd
import os

In [2]:
def extract_solar_radiation_to_csv(input_path, output_path):
    """
    Loads an ERA5 NetCDF file, extracts solar radiation (ssrd),
    averages it over all grid points in Ireland, and saves as CSV.
    """
    # Step 1: Load the NetCDF dataset
    ds = xr.open_dataset(input_path)

    # Step 2: Average over all lat/lon (get Ireland-wide value)
    df = ds["ssrd"].mean(dim=["latitude", "longitude"]).to_dataframe().reset_index()

    # Step 3: Keep only time and value columns, rename for clarity
    df = df[["valid_time", "ssrd"]]
    df.columns = ["Date", "Solar_Radiation_MJ_per_m2"]

    # Step 4: Sort and clean index
    df.sort_values("Date", inplace=True)
    df.reset_index(drop=True, inplace=True)

    # Step 5: Ensure output folder exists
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # Step 6: Save to CSV
    df.to_csv(output_path, index=False)
    print(f"✅ Solar radiation data saved to: {output_path}")

    return df


In [3]:
if __name__ == "__main__":
    input_file = "../data/raw/data_stream-moda_stepType-avgad.nc"
    output_file = "../data/processed/Solar_Radiation_Monthly_Ireland.csv"
    extract_solar_radiation_to_csv(input_file, output_file)


✅ Solar radiation data saved to: ../data/processed/Solar_Radiation_Monthly_Ireland.csv
