This script takes the Republic of Ireland's annual solar PV capacity data (2008–2023), checks for consistency, interpolates it into monthly values, and prepares it for modeling by saving a clean CSV file.

In [2]:
#To load and manipulate data
import pandas as pd
#To handle safe saving of output files
import os

Cleans and converts annual solar capacity data into monthly values:
Checks types and missing values
Interpolates each annual value into 12 monthly entries
Creates a proper datetime column (YYYY-MM-01)
Saves to processed folder

In [3]:
def clean_solar_capacity_data(input_path, output_path):

    # Step 1: Load Excel
    df_raw = pd.read_excel(input_path)

    # Step 2: Ensure numeric format and drop missing
    df_raw["Year"] = pd.to_numeric(df_raw["Year"], errors='coerce')
    df_raw["Solar_Capacity_MW"] = pd.to_numeric(df_raw["Solar_Capacity_MW"], errors='coerce')
    df_raw.dropna(inplace=True)

    # Step 3: Spread each year into 12 monthly values
    monthly_data = []
    for _, row in df_raw.iterrows():
        year = int(row["Year"])
        value = row["Solar_Capacity_MW"]
        for month in range(1, 13):
            monthly_data.append({
                "Date": pd.Timestamp(year=year, month=month, day=1),
                "Solar_Capacity_MW": value
            })

    # Step 4: Create and clean new DataFrame
    df_monthly = pd.DataFrame(monthly_data)
    df_monthly.sort_values("Date", inplace=True)
    df_monthly.reset_index(drop=True, inplace=True)

    # Step 5: Save to processed CSV
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    df_monthly.to_csv(output_path, index=False)
    print(f"✅ Monthly solar capacity data saved to: {output_path}")

    return df_monthly


Execute the cleaning when script is run

In [4]:
if __name__ == "__main__":
    input_file = "../data/raw/Solar_Capacity_Ireland_2008_2023.xlsx"
    output_file = "../data/processed/Cleaned_Solar_Capacity.csv"
    clean_solar_capacity_data(input_file, output_file)

✅ Monthly solar capacity data saved to: ../data/processed/Cleaned_Solar_Capacity.csv
