This script loads ERA5 monthly NetCDF data, extracts ssrd (surface solar radiation downward), averages it across Ireland, and saves the cleaned output in a .csv format — optimized for ML workflows.

In [1]:
pip install netCDF4

Note: you may need to restart the kernel to use updated packages.


In [None]:
#for working with NetCDF
import xarray as xr
import pandas as pd
import os

In [2]:
def extract_solar_radiation_to_csv(input_path, output_path):
    """
    Loads an ERA5 NetCDF file, extracts solar radiation (ssrd),
    averages it over all grid points in Ireland, and saves as CSV.
    """
    # Step 1: Load the NetCDF dataset
    ds = xr.open_dataset(input_path)

    # Step 2: Average over all lat/lon (get Ireland-wide value)
    df = ds["ssrd"].mean(dim=["latitude", "longitude"]).to_dataframe().reset_index()

    # Step 3: Keep only time and value columns, rename for clarity
    df = df[["valid_time", "ssrd"]]
    df.columns = ["Date", "Solar_Radiation_MJ_per_m2"]

    # Step 4: Sort and clean index
    df.sort_values("Date", inplace=True)
    df.reset_index(drop=True, inplace=True)

    # Step 5: Ensure output folder exists
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # Step 6: Save to CSV
    df.to_csv(output_path, index=False)
    print(f"✅ Solar radiation data saved to: {output_path}")

    return df


In [3]:
if __name__ == "__main__":
    input_file = "../data/raw/data_stream-moda_stepType-avgad.nc"
    output_file = "../data/processed/Solar_Radiation_Monthly_Ireland.csv"
    extract_solar_radiation_to_csv(input_file, output_file)


✅ Solar radiation data saved to: ../data/processed/Solar_Radiation_Monthly_Ireland.csv


Clean & Export ERA5 NetCDF to Excel

This script processes ERA5 monthly .nc files by:
Extracting ssrd (surface solar radiation)
Averaging it over all Ireland grid points
Saving a clean monthly time series to Excel (.xlsx)

In [None]:
#xarray to load .nc files
import xarray as xr
#pandas to clean and export the data
import pandas as pd
#to handle folders and paths
import os

In [None]:
def extract_solar_radiation_to_excel(input_path, output_path):
    """
    Loads an ERA5 NetCDF file, extracts monthly solar radiation (ssrd),
    averages it over all grid points in Ireland, and exports to Excel.
    """
    # Load the NetCDF dataset
    ds = xr.open_dataset(input_path)

    # Extract and average solar radiation over space (lat/lon)
    df = ds["ssrd"].mean(dim=["latitude", "longitude"]).to_dataframe().reset_index()

    # Keep only necessary columns and rename them
    df = df[["valid_time", "ssrd"]]
    df.columns = ["Date", "Solar_Radiation_MJ_per_m2"]

    # Sort by date
    df.sort_values("Date", inplace=True)
    df.reset_index(drop=True, inplace=True)

    # Ensure output directory exists
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # Export to Excel
    df.to_excel(output_path, index=False)
    print(f"✅ Monthly solar radiation exported to: {output_path}")

    return df


In [None]:
if __name__ == "__main__":
    input_file = "../data/raw/data_stream-moda_stepType-avgad.nc"
    output_file = "../data/processed/Solar_Radiation_Monthly_Ireland.xlsx"
    extract_solar_radiation_to_excel(input_file, output_file)