In [1]:
import pandas as pd
import os

def read_excel_files(folder_path):
    """
    Reads all Excel files in the specified folder into pandas DataFrames.
    The DataFrames are stored in a dictionary, keyed by the year or year range.
    
    Args:
    - folder_path (str): The path to the folder containing the Excel files.
    
    Returns:
    - dict: A dictionary where each key is a string representing the year or year range of the data,
            and each value is a pandas DataFrame of the data from the corresponding file.
    """
    # Initialize an empty dictionary to store the dataframes
    dataframes = {}
    
    # List all Excel files in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith('.xlsx'):
            # Extract the year or year range from the filename
            year_range = filename.replace('System-Data-Qtr-Hourly-', '').replace('.xlsx', '')
            # Construct the full path to the file
            file_path = os.path.join(folder_path, filename)
            # Read the Excel file into a DataFrame
            df = pd.read_excel(file_path)
            # Store the DataFrame in the dictionary with the year range as the key
            dataframes[year_range] = df
            
    return dataframes


In [8]:
folder_path = 'data'  # Update this to the correct path if needed
dataframes_dict = read_excel_files(folder_path)

# Access each DataFrame using its year or year range as the key
# df_2014_2015 = dataframes_dict['2014-2015']


In [10]:
# Storing datasets in seperate dataframes for checking and processing 

df_2014_2015 = dataframes_dict['2014-2015']
df_2016_2017 = dataframes_dict['2016-2017']
df_2018_2019 = dataframes_dict['2018-2019']
df_2020_2021 = dataframes_dict['2020-2021']
df_2022_2023 = dataframes_dict['2022-2023']
df_2024_up_to_march = dataframes_dict['2024-Mar']

In [23]:
df_2014_2015.head()

Unnamed: 0,DateTime,GMT Offset,NI Generation,NI Demand,NI Wind Availability,NI Wind Generation,IE Generation,IE Demand,IE Wind Availability,IE Wind Generation,SNSP,Unnamed: 11,NI: Northern Ireland\nIE: Ireland
0,2014-01-01 00:00:00,0,637.98,859.36,367.58,365.57,2752.48,2898.72,1053.11,1020.23,0.4579,,Notes: \nThe system data in this report is is ...
1,2014-01-01 00:15:00,0,625.68,855.46,345.94,352.82,2733.59,2868.97,1021.59,995.07,0.4505,,
2,2014-01-01 00:30:00,0,614.72,840.0,333.22,339.6,2686.17,2826.42,972.06,932.71,0.4383,,
3,2014-01-01 00:45:00,0,588.73,824.25,307.44,313.66,2657.56,2786.94,985.81,959.06,0.4447,,
4,2014-01-01 01:00:00,0,593.06,818.84,306.87,315.17,2584.65,2723.94,958.63,920.82,0.4429,,


In [13]:
df_2016_2017.head()

Unnamed: 0,DateTime,GMT Offset,NI Generation,NI Demand,NI Wind Availability,NI Wind Generation,IE Generation,IE Demand,IE Wind Availability,IE Wind Generation,SNSP,Unnamed: 11,NI: Northern Ireland\nIE: Ireland
0,2016-01-01 00:00:00,0,983.74,809.19,386.45,377.76,2970.38,2970.79,714.84,673.64,0.2521,,Notes: \nThe system data in this report is is ...
1,2016-01-01 00:15:00,0,924.47,801.97,364.28,359.51,3076.98,2966.94,749.74,704.85,0.2528,,
2,2016-01-01 00:30:00,0,930.77,786.43,379.74,376.38,2935.55,2933.97,737.41,692.85,0.2623,,
3,2016-01-01 00:45:00,0,902.89,779.32,342.0,336.94,2964.88,2876.85,722.9,683.34,0.2507,,
4,2016-01-01 01:00:00,0,876.46,768.91,322.94,317.16,2953.5,2817.73,726.04,684.65,0.2481,,


In [14]:
df_2018_2019.head()

Unnamed: 0,DateTime,GMT Offset,NI Generation,NI Demand,NI Wind Availability,NI Wind Generation,NI Solar Availability,NI Solar Generation,IE Generation,IE Demand,IE Wind Availability,IE Wind Generation,SNSP,Unnamed: 13,NI: Northern Ireland\nIE: Ireland
0,2018-01-01 00:00:00,0,960.779,719.573,588.375,519.465,0.46,0.484,3486.41,3049.03,2191.94,2069.44,0.5655,,Notes: \nThe system data in this report is is ...
1,2018-01-01 00:15:00,0,950.704,711.551,591.868,511.766,0.46,0.485,3460.88,3015.61,2123.39,1994.07,0.5523,,
2,2018-01-01 00:30:00,0,942.374,699.66,567.388,501.927,0.46,0.487,3391.46,2958.13,2023.78,1894.12,0.5369,,
3,2018-01-01 00:45:00,0,949.722,692.245,560.438,500.021,0.46,0.486,3328.62,2910.69,2003.35,1866.58,0.5369,,
4,2018-01-01 01:00:00,0,956.849,698.968,564.892,501.535,0.46,0.485,3293.7,2863.56,2014.68,1884.76,0.5449,,


In [15]:
df_2020_2021.head()

Unnamed: 0,DateTime,GMT Offset,NI Generation,NI Demand,NI Wind Availability,NI Wind Generation,NI Solar Availability,NI Solar Generation,IE Generation,IE Demand,IE Wind Availability,IE Wind Generation,SNSP,Unnamed: 13,NI: Northern Ireland\nIE: Ireland
0,2020-01-01 00:00:00,0,805.808,736.418,268.222,267.818,0.0,0.0,2708.45,3035.95,331.08,324.1,0.2132,,Notes: \nThe system data in this report is is ...
1,2020-01-01 00:15:00,0,808.93,727.636,271.798,272.509,0.0,0.0,2757.59,3001.06,332.35,324.88,0.1981,,
2,2020-01-01 00:30:00,0,799.635,715.448,264.655,264.816,0.0,0.0,2765.93,2956.65,326.64,318.97,0.1921,,
3,2020-01-01 00:45:00,0,781.243,704.161,256.279,255.498,0.0,0.0,2741.16,2912.98,319.0,311.37,0.1871,,
4,2020-01-01 01:00:00,0,828.025,714.902,256.845,257.602,0.0,0.0,2727.65,2868.51,328.5,320.47,0.1742,,


In [27]:
df_test_20_21.head()

Unnamed: 0,DateTime,NI Generation,NI Demand,NI Wind Availability,NI Wind Generation,NI Solar Availability,NI Solar Generation,IE Generation,IE Demand,IE Wind Availability,IE Wind Generation,SNSP
0,2020-01-01 00:00:00,805.808,736.418,268.222,267.818,0.0,0.0,2708.45,3035.95,331.08,324.1,0.2132
1,2020-01-01 00:15:00,808.93,727.636,271.798,272.509,0.0,0.0,2757.59,3001.06,332.35,324.88,0.1981
2,2020-01-01 00:30:00,799.635,715.448,264.655,264.816,0.0,0.0,2765.93,2956.65,326.64,318.97,0.1921
3,2020-01-01 00:45:00,781.243,704.161,256.279,255.498,0.0,0.0,2741.16,2912.98,319.0,311.37,0.1871
4,2020-01-01 01:00:00,828.025,714.902,256.845,257.602,0.0,0.0,2727.65,2868.51,328.5,320.47,0.1742


In [19]:
df_2020_2021.columns

Index(['DateTime', 'GMT Offset', 'NI Generation', 'NI Demand',
       'NI Wind Availability', 'NI Wind Generation', 'NI Solar Availability',
       'NI Solar Generation', 'IE Generation', 'IE Demand',
       'IE Wind Availability', 'IE Wind Generation', 'SNSP', 'Unnamed: 13',
       'NI: Northern Ireland\nIE: Ireland'],
      dtype='object')

In [24]:
columns_to_keep = ['DateTime', 'NI Generation','NI Demand', 'NI Wind Availability', 'NI Wind Generation', 'NI Solar Availability',
       'NI Solar Generation', 'IE Generation', 'IE Demand',
       'IE Wind Availability', 'IE Wind Generation', 'SNSP']


In [26]:
df_test_20_21 = df_2020_2021[[col for col in columns_to_keep if col in df_2020_2021.columns]]


In [28]:
def clean_df_columns_before_2022(df: pd.DataFrame) -> pd.DataFrame:
    """Filters DataFrames for years before 2022 to keep only selected relevant columns.

    This function ensures that only the columns present in both the DataFrame and the
    predefined list of columns to keep are retained. If certain columns are missing in
    the input DataFrame, they will be silently ignored.

    Args:
        df (pd.DataFrame): The input DataFrame for years before 2022.

    Returns:
        pd.DataFrame: A filtered DataFrame containing only the selected columns that exist
        in the input DataFrame.
    """   
    columns_to_keep = ['DateTime', 'NI Generation','NI Demand', 'NI Wind Availability', 'NI Wind Generation', 'NI Solar Availability',
       'NI Solar Generation', 'IE Generation', 'IE Demand',
       'IE Wind Availability', 'IE Wind Generation', 'SNSP']
    
    df_filtered = df[[col for col in columns_to_keep if col in df.columns]]
    return df_filtered
    

In [25]:
columns_to_keep

['DateTime',
 'NI Generation',
 'NI Demand',
 'NI Wind Availability',
 'NI Wind Generation',
 'NI Solar Availability',
 'NI Solar Generation',
 'IE Generation',
 'IE Demand',
 'IE Wind Availability',
 'IE Wind Generation',
 'SNSP']

In [18]:
df_2022_2023.columns

Index(['DateTime', 'GMT Offset', 'NI Generation', 'NI Demand',
       'NI Wind Availability', 'NI Wind Generation', 'NI Solar Availability',
       'NI Solar Generation', 'Moyle I/C', 'NI Wind Penetration',
       'NI Solar Penetration', 'IE Generation', 'IE Demand',
       'IE Wind Availability', 'IE Wind Generation', 'IE Solar Availability',
       'IE Solar Generation', 'IE Hydro', 'EWIC I/C', 'IE Wind Penetration',
       'IE Solar Penetration', 'AI Generation', 'AI Demand',
       'AI Wind Availability', 'AI Wind Generation', 'AI Solar Availability',
       'AI Solar Generation', 'AI Hydro', 'Inter-Jurisdictional Flow',
       'AI Wind Penetration', 'AI Solar Penetration', 'AI Oversupply',
       'AI Oversupply Percentage', 'SNSP', 'Unnamed: 34', 'Unnamed: 35',
       'NI: Northern Ireland \nIE: Ireland \nAI: All Island '],
      dtype='object')

In [21]:
596.153/598.227

0.9965330886101765

In [None]:
NI Wind Penetration = NI Wind Generation / NI Demand
IE Wind Penetration = IE Wind Generation / IE Demand



In [16]:
df_2022_2023.head()

Unnamed: 0,DateTime,GMT Offset,NI Generation,NI Demand,NI Wind Availability,NI Wind Generation,NI Solar Availability,NI Solar Generation,Moyle I/C,NI Wind Penetration,...,AI Hydro,Inter-Jurisdictional Flow,AI Wind Penetration,AI Solar Penetration,AI Oversupply,AI Oversupply Percentage,SNSP,Unnamed: 34,Unnamed: 35,NI: Northern Ireland \nIE: Ireland \nAI: All Island
0,2022-01-01 00:00:00,0,959.377,598.227,886.598,596.153,0.0,0.0,-321.933,0.996533,...,155.84,-13.63,0.835174,0.0,102.131,0.022518,0.6935,,,Notes: \nThe system data in this report is is ...
1,2022-01-01 00:15:00,0,961.808,581.954,893.691,596.546,0.0,0.0,-322.933,1.025074,...,156.36,-32.6,0.847422,0.0,98.833,0.021709,0.6925,,,
2,2022-01-01 00:30:00,0,958.665,566.308,911.394,599.373,0.0,0.0,-323.0,1.058387,...,155.03,-43.63,0.850698,0.0,176.799,0.038751,0.6945,,,
3,2022-01-01 00:45:00,0,963.776,555.972,927.896,600.444,0.0,0.0,-323.133,1.07999,...,155.99,-59.19,0.874023,0.0,153.25,0.033574,0.696,,,
4,2022-01-01 01:00:00,0,973.501,569.451,915.996,605.222,0.0,0.0,-322.267,1.062817,...,156.5,-56.39,0.910848,0.0,28.675,0.006265,0.6997,,,


In [17]:
df_2024_up_to_march.head()

Unnamed: 0,DateTime,GMT Offset,NI Generation,NI Demand,NI Wind Availability,NI Wind Generation,NI Solar Availability,NI Solar Generation,Moyle I/C,NI Wind Penetration,...,AI Hydro,Inter-Jurisdictional Flow,AI Wind Penetration,AI Solar Penetration,AI Oversupply,AI Oversupply Percentage,SNSP,Unnamed: 34,Unnamed: 35,NI: Northern Ireland \nIE: Ireland \nAI: All Island
0,2024-01-01 00:00:00,0,715.613,584.635,541.716,249.907,0.0,0.0,189.187,0.427458,...,179.15,-308.06,0.616345,2.5e-05,0.0,0.0,0.6632,,,Notes: \nThe system data in this report is is ...
1,2024-01-01 00:15:00,0,702.913,570.701,528.684,216.946,0.0,0.0,159.985,0.38014,...,178.21,-279.58,0.605382,2.5e-05,0.0,0.0,0.6442,,,
2,2024-01-01 00:30:00,0,799.801,563.575,506.628,301.445,0.0,0.0,84.05,0.53488,...,177.41,-306.58,0.630409,2.5e-05,0.0,0.0,0.6327,,,
3,2024-01-01 00:45:00,0,877.653,562.83,512.047,376.547,0.0,0.0,25.885,0.669024,...,177.37,-326.14,0.646677,2.3e-05,0.0,0.0,0.6277,,,
4,2024-01-01 01:00:00,0,872.636,577.349,501.778,378.713,0.0,0.0,80.209,0.655952,...,177.72,-363.06,0.636304,2.6e-05,0.0,0.0,0.6316,,,
