Final Solar Merge + Zone Code

In [9]:
import pandas as pd

# === 1️⃣ File paths ===
tso_file = '../data/processed/TSO_Solar_Capacity_Clean.csv'
dso_file = '../data/processed/DSO_Solar_Capacity_Clean.csv'
merged_output = '../data/processed/Solar_Capacity_Master.csv'
regional_output = '../data/processed/Regional_Solar_Capacity.csv'

In [10]:
# === 2️⃣ Load both cleaned CSVs ===
df_tso = pd.read_csv(tso_file)
df_dso = pd.read_csv(dso_file)

In [11]:
# === 3️⃣ Merge them ===
df_merged = pd.concat([df_tso, df_dso], ignore_index=True)

In [12]:
# === 4️⃣ Define Republic-only zones ===

east_counties = [
    "Dublin", "Meath", "Louth", "Offaly", "Wexford", "Waterford",
    "Kilkenny", "Tipperary", "Kildare"
]

west_counties = [
    "Galway"
]

north_counties = [
    # None — all Republic-only stations are East, West, or South
]

south_counties = [
    "Cork", "Kerry", "Limerick"
]

# NI counties to skip for final Regional table
ni_counties = ["Derry", "Tyrone", "Antrim", "Down", "Armagh", "Fermanagh"]

In [13]:
# === 5️⃣ Map County → Zone ===
def county_to_zone(county):
    if county in ni_counties:
        return 'NI'
    elif county in east_counties:
        return 'East'
    elif county in west_counties:
        return 'West'
    elif county in north_counties:
        return 'North'
    elif county in south_counties:
        return 'South'
    else:
        return 'Other'

In [14]:
df_merged['Zone'] = df_merged['County'].apply(county_to_zone)

In [15]:
# === 6️⃣ Print unmapped
print("\n✅ Solar Counties mapped as 'Other':")
print(df_merged[df_merged['Zone'] == 'Other']['County'].unique())


✅ Solar Counties mapped as 'Other':
[]


In [16]:
# === 7️⃣ Save full Solar merged block (keeps NI + Other)
df_merged.to_csv(merged_output, index=False)
print(f"✅ Solar capacity master saved to: {merged_output}")

✅ Solar capacity master saved to: ../data/processed/Solar_Capacity_Master.csv


In [17]:
# === 8️⃣ Filter out NI for final regional sum
df_regional = df_merged[df_merged['Zone'] != 'NI']

In [18]:
# === 9️⃣ Group by Zone, Type, Connection_Type & sum
df_regional = (
    df_regional.groupby(['Zone', 'Type', 'Connection_Type'])
    .agg({'Installed_MW': 'sum'})
    .reset_index()
)

In [19]:
# === 🔟 Save regional grouped Solar
df_regional.to_csv(regional_output, index=False)
print(f"✅ Regional solar capacity saved to: {regional_output}")

✅ Regional solar capacity saved to: ../data/processed/Regional_Solar_Capacity.csv
