In [1]:
import pandas as pd

In [6]:
# Define input/output paths
raw_file = "../data/raw/Transmission_Connected_Wind_Gen_in_Ireland.xlsx"
output_file = "../data/processed/TSO_Wind_Capacity_Clean.csv"

In [7]:
# === 2️⃣ Load the Excel file ===
xls = pd.ExcelFile(raw_file)
sheet_names = xls.sheet_names  # Check sheet names if needed
df = pd.read_excel(xls, sheet_name=sheet_names[0])  # Use first sheet

In [8]:
# === 3️⃣ Rename and select relevant columns ===
df_clean = df.rename(columns={
    'Generator': 'Project',
    'Type': 'Type',
    'Category': 'Category',
    'Installed \nCapacity \n(MW)': 'Installed_MW',
    'Full / Latest Connection\nDate': 'Connection_Date',
    'County': 'County'
})

In [9]:
# === 4️⃣ Keep only required columns ===
df_clean = df_clean[['Project', 'Type', 'Category', 'Installed_MW', 'Connection_Date', 'County']]

In [10]:
# === 5️⃣ Add Connection_Type ===
df_clean['Connection_Type'] = 'TSO'

In [11]:
# === 6️⃣ Drop rows with missing Installed_MW or County (optional but recommended) ===
df_clean = df_clean.dropna(subset=['Installed_MW', 'County'])

In [13]:
# === 7️⃣ Save to processed folder ===
df_clean.to_csv(output_file, index=False)
print(f"✅ Cleaned TSO Wind capacity saved to: {output_file}")

✅ Cleaned TSO Wind capacity saved to: ../data/processed/TSO_Wind_Capacity_Clean.csv


 DSO Wind Cleaning + Mapping Code

In [14]:
# === 1️⃣ File paths ===
raw_file = '../data/raw/Distribution_Connected_Wind_Gen_in_Ireland.xlsx'
output_file = '../data/processed/DSO_Wind_Capacity_Clean.csv'

In [15]:
# === 2️⃣ Load Excel ===
xls = pd.ExcelFile(raw_file)
sheet_names = xls.sheet_names
df = pd.read_excel(xls, sheet_name=sheet_names[0])

In [16]:
# === 3️⃣ Rename and select ===
df_clean = df.rename(columns={
    'DSO Windfarm': 'Project',
    'Type': 'Type',
    'MEC (MW)': 'Installed_MW',
    'Connection Date': 'Connection_Date',
    '110kV Station': 'Station'
})

In [17]:
df_clean = df_clean[['Project', 'Type', 'Installed_MW', 'Connection_Date', 'Station']]

In [18]:
# === 4️⃣ Add Connection_Type ===
df_clean['Connection_Type'] = 'DSO'

In [19]:
# === 5️⃣ Manual station → county mapping ===
station_to_county = {
    "Arigna": "Roscommon",
    "Cauteen T144": "Tipperary",
    "Coomataggart": "Cork",
    "Reamore T142": "Kerry",
    "Salthill": "Galway",
    "Dalton T141": "Tipperary",
    "Lisheen": "Tipperary",
    "BOOLTIAGH T143": "Clare",
    "Cauteen T141": "Tipperary",
    "Cordal": "Kerry",
    "Arklow": "Wicklow",
    "Glenree": "Donegal",
    "GARVAGH": "Derry",
    "Charleville T141": "Cork",
    "Cauteen T142": "Tipperary",
    "Cauteen T142": "Tipperary",
    "Ikerrin": "Tipperary",
    "Barrymore": "Cork",
    "SORNE HILL": "Donegal",
    "Glenree": "Donegal",
    "Glenlara T142": "Cork",
    "Arklow": "Wicklow",
    "BOOLTIAGH T141": "Clare",
    "Macroom": "Cork",
    "Meath Hill": "Meath",
    "Trien T143": "Roscommon",
    "Carlow": "Carlow",
    "Wexford": "Wexford",
    "Cloghboola": "Cork",
    "BOGGERAGH": "Cork",
    "Ballylickey": "Cork",
    "TAWNAGHMORE": "Mayo",
    "Castlebar": "Mayo",
    "Cordal": "Kerry",
    "Charleville T141": "Cork",
    "Arklow": "Wicklow",
    "Kilpaddoge": "Kerry",
    "Ardnagappary": "Mayo",
    "Cathaleens Fall": "Donegal",
    "Trien T143": "Roscommon",
    "Dunmanway": "Cork",
    "Glenlara T141": "Cork",
    "Dundalk": "Louth",
    "Reamore T141": "Kerry",
    "Tralee": "Kerry",
    "Rathkeale": "Limerick",
    "Letterkenny": "Donegal",
    "Tralee": "Kerry",
    "Crory T121": "Wexford",
    "Crory T121": "Wexford",
    "Trillick": "Tyrone",
    "Thurles": "Tipperary",
    "Glenlara T141": "Cork",
    "Tullabrack": "Clare",
    "BOOLTIAGH T143": "Clare",
    "Cauteen T141": "Tipperary",
    "Tullabrack": "Clare",
    "Cloghboola": "Cork",
    "Glenree 110-MV": "Donegal",
    "Rathkeale": "Limerick",
    "Cloghboola": "Cork",
    "BOGGERAGH": "Cork",
    "Corderry T122": "Monaghan",
    "Wexford": "Wexford",
    "Binbane": "Donegal",
    "Letterkenny": "Donegal",
    "Thurles": "Tipperary",
    "Ballylickey": "Cork",
    "Meath Hill": "Meath",
    "Binbane T143": "Donegal",
    "Glenlara T141": "Cork",
    "Meath Hill": "Meath",
    "Corderry T122": "Monaghan",
    "Garrow": "Donegal",
    "Cloghboola": "Cork",
    "Dallow": "Cork",
    "Binbane T143": "Donegal",
    "Trillick": "Tyrone",
    "Crory T121": "Wexford",
    "Thurles": "Tipperary",
    "Knockearagh": "Kerry",
    "SORNE HILL": "Donegal",
    "Ballyragget": "Kilkenny",
    "Binbane T143": "Donegal",
    "Meath Hill": "Meath",
    "Cauteen T141": "Tipperary",
    "Corderry T122": "Monaghan",
    "Oughtragh": "Leitrim",
    "Bandon": "Cork",
    "Ballylickey": "Cork",
    "Castlebar": "Mayo",
    "Ballylickey": "Cork",
    "Waterford": "Waterford",
    "BOOLTIAGH T141": "Clare",
    "Dunmanway": "Cork",
    "Sligo": "Sligo",
    "Corderry T122": "Monaghan",
    "Binbane": "Donegal",
    "Somerset": "Galway",
    "Meath Hill": "Meath",
    "Ardnacrusha": "Clare",
    "Trien": "Roscommon",
    "SORNE HILL": "Donegal",
    "Trillick": "Tyrone",
    "Castlebar": "Mayo",
    "Corderry T122": "Monaghan",
    "Tralee": "Kerry",
    "Wexford": "Wexford",
    "Trien T143": "Roscommon",
    "Bellacorick 110-MV": "Mayo",
    "Sligo": "Sligo",
    "BOOLTIAGH T143": "Clare",
    "Waterford": "Waterford",
    "Binbane T143": "Donegal",
    "Moy": "Mayo",
    "BOGGERAGH": "Cork",
    "Dunmanway": "Cork",
    "Dunmanway": "Cork",
    "Tonroe": "Mayo",
    "BOGGERAGH": "Cork",
    "Tralee": "Kerry",
    "Crory T121": "Wexford",
    "Arigna": "Roscommon",
    "Trillick": "Tyrone",
    "Letterkenny": "Donegal",
    "Rathkeale": "Limerick",
    "Crane": "Galway",
    "Garrow": "Donegal",
    "Dungarvan": "Waterford",
    "Carlow": "Carlow",
    "Ardnagappary": "Mayo",
    "Corderry T121": "Monaghan",
    "Trillick": "Tyrone",
    "Arigna": "Roscommon",
    "Charleville": "Cork",
    "Charleville": "Cork",
    "BOOLTIAGH T143": "Clare",
    "Castlebar": "Mayo",
    "Dunmanway": "Cork",
    "Thurles": "Tipperary",
    "Arigna": "Roscommon",
    "Lanesborough": "Longford",
    "Tullabrack": "Clare",
    "Trien": "Roscommon",
    "Dunmanway": "Cork",
    "Trien": "Roscommon",
    "Carlow": "Carlow",
    "Bandon": "Cork",
    "Meath Hill": "Meath",
    "Knockearagh": "Kerry",
    "Charleville": "Cork",
    "Charleville": "Cork",
    "Ballylickey": "Cork",
    "Trien": "Roscommon",
    "Dunmanway": "Cork",
    "Waterford": "Waterford",
    "SORNE HILL": "Donegal",
    "Dallow": "Cork",
    "Cloon": "Mayo",
    "Dallow": "Cork",
    "SORNE HILL": "Donegal",
    "Crory T121": "Wexford",
    "Crory T121": "Wexford",
    "Trillick": "Tyrone",
    "Corderry T122": "Monaghan",
    "Tralee": "Kerry",
    "Crory T121": "Wexford",
    "NENAGH": "Tipperary",
    "Cordal": "Kerry",
    "Tonroe": "Mayo",
    "Binbane T143": "Donegal",
    "Crory T121": "Wexford",
    "Corderry T121": "Monaghan",
    "Corderry T121": "Monaghan",
    "Letterkenny": "Donegal",
    "Cathaleens Fall": "Donegal",
    "Ikerrin": "Tipperary",
    "Castlebar": "Mayo",
    "Gortawee": "Cavan",
    "Tipperary": "Tipperary",
    "Shankill": "Dublin",
    "Screeb": "Galway",
    "Cauteen T141": "Tipperary",
    "Glenree": "Donegal",
    "Screeb": "Galway",
    "Meath Hill": "Meath",
    "Binbane": "Donegal",
    "Letterkenny": "Donegal",
    "Dallow": "Cork",
    "NENAGH": "Tipperary",
    "Carlow": "Carlow",
    "NENAGH": "Tipperary",
    "Dalton": "Tipperary",
    "Bellacorick": "Mayo",
    "Trien": "Roscommon",
    "NENAGH": "Tipperary",
    "Binbane": "Donegal",
    "Barnahely 110-MV": "Cork",
    "Crane": "Galway",
    "Tonroe": "Mayo",
    "Letterkenny": "Donegal",
    "NENAGH": "Tipperary",
    "SORNE HILL": "Donegal",
    "Drybridge": "Wexford",
    "Thurles": "Tipperary",
    "SORNE HILL": "Donegal",
    "Barnahely 110-MV": "Cork",
    "Cathaleens Fall": "Donegal",
    "Drybridge": "Wexford",
    "Reamore T142": "Kerry",
    "Drybridge": "Wexford",
    "Carlow": "Carlow",
    "BUTLERSTOWN": "Waterford",
    "Tralee": "Kerry",
    "Midleton": "Cork",
    "Trien": "Roscommon",
    "Tipperary": "Tipperary",
    "Shankill": "Dublin",
    "Corderry T122": "Monaghan",
    "Castlebar": "Mayo",
    "Ballylickey": "Cork",
    "Shankill": "Dublin",
    "SORNE HILL": "Donegal",
    "Trien": "Roscommon",
    "Cauteen T141": "Tipperary",
    "Kilbarry": "Waterford",
    "Drybridge": "Wexford",
    "Screeb": "Galway",
    "ARDNAGAPPARY": "Mayo",
    "Ardnacrusha": "Clare",
    "Crory T121": "Wexford",
    "SORNE HILL": "Donegal",
    "Carlow": "Carlow",
    "Glasmore": "Dublin",
    "Dundalk": "Louth",
    "Cathaleens Fall": "Donegal",
    "SORNE HILL": "Donegal",
    "Waterford": "Waterford",
    "Carlow": "Carlow",
    "Carlow": "Carlow",
    "Thurles": "Tipperary",
    "Trien": "Roscommon",
    "Glasmore": "Dublin",
    "Bandon 110-MV": "Cork",
    "Cahir": "Tipperary",
    "Dungarvan": "Waterford",
    "Sorne Hill": "Donegal",
    "SORNE HILL": "Donegal",
    "Drybridge": "Wexford",
    "Corderry T122": "Monaghan",
    "Grange (DR)": "Sligo",
    "Ardnacrusha": "Clare",
    "Crane": "Galway",
    "Drybridge": "Wexford",
    "Drybridge": "Wexford",
    "Dungarvan": "Waterford",
    "Carlow": "Carlow",
    "Cahir": "Tipperary",
    "Stratford": "Wicklow",
    "Binbane": "Donegal",
    "ATHLONE": "Westmeath",
    "WEXFORD": "Wexford",
    "COOKSTOWN": "Tyrone",
    "Cathaleens Fall": "Donegal",
    "Griffinrath": "Kildare",
    "Shankill": "Dublin",
    "Bellacorick": "Mayo",
    "Shankill": "Dublin",
    "Meath Hill": "Meath",
    "CAHIR": "Tipperary",
}


In [20]:
# === 6️⃣ Map to County ===
df_clean['County'] = df_clean['Station'].map(station_to_county)

In [21]:
# === 7️⃣ Drop rows without valid County (unmapped stations)
missing_county = df_clean[df_clean['County'].isna()]
if not missing_county.empty:
    print("⚠️ Unmapped stations found:\n", missing_county['Station'].unique())

⚠️ Unmapped stations found:
 ['CORDERRY T122' 'CRANE' nan]


In [22]:
df_clean = df_clean.dropna(subset=['County'])

In [23]:
# === 8️⃣ Drop Station column — not needed after mapping
df_clean = df_clean[['Project', 'Type', 'Installed_MW', 'Connection_Date', 'County', 'Connection_Type']]

In [24]:
# === 9️⃣ Save cleaned file ===
df_clean.to_csv(output_file, index=False)

In [25]:
print(f"✅ Cleaned DSO Wind capacity saved to: {output_file}")

✅ Cleaned DSO Wind capacity saved to: ../data/processed/DSO_Wind_Capacity_Clean.csv


TSO Solar Cleaning Code

In [3]:
import pandas as pd

In [4]:
# === 1️⃣ File paths ===
raw_file = '../data/raw/Transmission_Connected_Solar_Ge_in_ireland.xlsx'
output_file = '../data/processed/TSO_Solar_Capacity_Clean.csv'


In [5]:
# === 2️⃣ Load Excel ===
xls = pd.ExcelFile(raw_file)
sheet_names = xls.sheet_names
df = pd.read_excel(xls, sheet_name=sheet_names[0])

In [6]:
# === 3️⃣ Rename & select ===
df_clean = df.rename(columns={
    'Generator': 'Project',
    'Type': 'Type',
    'Installed \nCapacity \n(MW)': 'Installed_MW',
    'Full / Latest Connection\nDate': 'Connection_Date',
    'County': 'County'
})

In [7]:
df_clean = df_clean[['Project', 'Type', 'Installed_MW', 'Connection_Date', 'County']]

In [8]:
# === 4️⃣ Add Category & Connection_Type ===
df_clean['Category'] = 'Onshore'
df_clean['Connection_Type'] = 'TSO'

In [9]:
# === 5️⃣ Drop rows with missing Installed_MW or County (if any)
df_clean = df_clean.dropna(subset=['Installed_MW', 'County'])

In [10]:
# === 6️⃣ Save cleaned CSV ===
df_clean.to_csv(output_file, index=False)

print(f"✅ TSO Solar cleaned capacity saved to: {output_file}")

✅ TSO Solar cleaned capacity saved to: ../data/processed/TSO_Solar_Capacity_Clean.csv


DSO Solar Cleaning + Mapping Code

In [37]:
import pandas as pd

# === 1️⃣ File paths ===
raw_file = '../data/raw/Distribution_Connected_Solar_Ge_in_Ireland.xlsx'
output_file = '../data/processed/DSO_Solar_Capacity_Clean.csv'

In [38]:
# === 2️⃣ Load Excel ===
xls = pd.ExcelFile(raw_file)
sheet_names = xls.sheet_names
df = pd.read_excel(xls, sheet_name=sheet_names[0])

In [39]:
# === 3️⃣ Rename & select relevant columns ===
df_clean = df.rename(columns={
    'Project': 'Project',
    'Type': 'Type',
    'MEC (MW)': 'Installed_MW',
    'Connection Date': 'Connection_Date',
    'Feeding 110kV Station': 'Station',
    'Status': 'Status'
})

In [40]:
df_clean = df_clean[['Project', 'Type', 'Installed_MW', 'Connection_Date', 'Station', 'Status']]

In [41]:
# === 4️⃣ Keep only Energised projects ===
df_clean = df_clean[df_clean['Status'] == 'Energised']

In [42]:
# === 5️⃣ Drop the Status column now that it's filtered ===
df_clean = df_clean.drop(columns=['Status'])

In [43]:
# === 6️⃣ Manual station → county mapping ===
station_to_county = {
    "Dunfirth": "Meath",
    "BALTRASNA": "Meath",
    "Crane 110-MV": "Galway",
    "Ballybeg": "Wexford",
    "Wexford": "Wexford",
    "Great Island": "Wexford",
    "Kilbarry": "Waterford",
    "Wexford": "Wexford",
    "BANOGE": "Offaly",
    "Macroom": "Cork",
    "Waterford": "Waterford",
    "Midleton": "Cork",
    "Navan": "Meath",
    "Midleton": "Cork",
    "Dalton": "Tipperary",
    "Cahir": "Tipperary",
    "Ikerrin": "Tipperary",
    "Thurles": "Tipperary",
    "BARRYMORE": "Cork",
    "Castleview": "Cork",
    "LIMERICK": "Limerick",
    "DOON": "Limerick",
    "Dundalk": "Louth",
    "Thornsberry": "Offaly",
    "Shankill": "Dublin",
    "Trabeg": "Kerry",
    "Glenlara": "Cork",
    "Kilbarry": "Waterford",
    "BALTRASNA": "Meath",
    "Galway 110kV": "Galway",
    "Dundalk": "Louth",
    "Mallow 110kV": "Cork",
    "Drybridge": "Wexford",
    "Cahir 110kV": "Tipperary",
    "Kilkenny 110kV/38kV": "Kilkenny",
    "Banoge 110kV": "Offaly"
}

In [44]:
# === 7️⃣ Map to County ===
df_clean['County'] = df_clean['Station'].map(station_to_county)

In [45]:
# === 8️⃣ Drop rows with no valid County
df_clean = df_clean.dropna(subset=['County'])

In [46]:
# === 9️⃣ Add Category & Connection_Type
df_clean['Category'] = 'Onshore'
df_clean['Connection_Type'] = 'DSO'

In [47]:
# === 🔟 Final columns order
df_clean = df_clean[['Project', 'Type', 'Category', 'Installed_MW', 'Connection_Date', 'County', 'Connection_Type']]

In [48]:
# === ✅ Save to processed folder
df_clean.to_csv(output_file, index=False)

print(f"✅ DSO Solar cleaned capacity saved to: {output_file}")

✅ DSO Solar cleaned capacity saved to: ../data/processed/DSO_Solar_Capacity_Clean.csv
