In [1]:
# pip install pdfplumber
import pandas as pd
from pathlib import Path

In [2]:
#### data folder
data_folder = Path.cwd().parent / "Ofgem_Archetype"

# read the tables in the excel file
data = pd.read_excel(data_folder / "Ofgem energy consumer archetypes2024_Tables1.xlsx").dropna()

# display the data
data.head()


Unnamed: 0,Archetype,Number of\nhouseholds,ratio of households number,Average Annual Elec consumption (kWh),ratio of Average Annual Elec consumption,Average Annual Gas consumption (kWh),ratio of Average Annual Gas consumption,Total consumption (Elec+Gas) in KWh,Main heating Fuel,Gross\nannual\nhousehold\nincome,Attributes (key words)
0,A1,578333,0.021434,2742,0.025666,10933,0.054528,13675.0,Mains gas,15643.0,Lowest income; mains gas; retired; 75+ years o...
1,A2,868191,0.032176,2849,0.026668,9464,0.047202,12313.0,Mains gas,17327.0,Low income; housing association; single adults...
2,A3,883413,0.03274,3519,0.032939,10622,0.052977,14141.0,Mains gas,18195.0,Low income; mains gas; retired/unoccupied < 65...
3,B4,731318,0.027103,4811,0.045033,0,0.0,4811.0,Electricity,18776.0,Low income; electric heating; retired/unoccupi...
4,B5,465288,0.017244,6597,0.061751,0,0.0,6597.0,Electricity/Other (Solid fuel/LPG),22423.0,Low income; electric/solid fuel/LPG heating; 4...


### Calculate agents' share based on their fuel type and usage

In [4]:
# Add a function to account for partial matches in heating fuel names
def calculate_main_fuel_ratios(data, heating_fuel, consumption_column):
    # Filter rows where the heating fuel contains the given type (case-insensitive, exact match avoided)
    filtered_data = data[data['Main heating Fuel'].str.contains(heating_fuel, case=False, na=False, regex=False)].copy()
    
    # Calculate total consumption of the heating fuel for the group
    total_fuel_consumption = filtered_data[consumption_column].sum()
    
    # Calculate the ratio for each archetype within the group
    filtered_data['Fuel Consumption Ratio'] = (
        filtered_data[consumption_column] / total_fuel_consumption
    )
    
    # Select relevant columns for display
    return filtered_data[['Archetype', consumption_column, 'Fuel Consumption Ratio']]

# Calculate ratios for "Mains gas" based on gas consumption
mains_gas_fuel_ratios = calculate_main_fuel_ratios(data, 'Mains gas', 'Average Annual Gas consumption (kWh)')

# Calculate ratios for "Oil" based on electricity consumption (no specific Oil column in the dataset)
oil_fuel_ratios = calculate_main_fuel_ratios(data, 'Oil', 'Average Annual Elec consumption (kWh)')

# Calculate ratios for "Other (solid fuel/LPG)" based on electricity consumption (assuming lack of specific column)
other_fuel_ratios = calculate_main_fuel_ratios(data, 'Other (solid fuel/LPG)', 'Average Annual Elec consumption (kWh)')

# Calculate ratios for all main heating fuel types, including "Electricity"
electricity_fuel_ratios = calculate_main_fuel_ratios(data, 'Electricity', 'Average Annual Elec consumption (kWh)')



In [9]:
# Prepare a DataFrame with columns for archetypes and rows for fuel types
fuel_types = ['Mains gas', 'Oil', 'Other (solid fuel/LPG)', 'Electricity']

# Initialize an empty DataFrame with fuel types as index
ofgem_agent_df = pd.DataFrame(index=fuel_types)

# Add values for each fuel type into the DataFrame
def add_to_ofgem_agent_df(fuel_ratios, fuel_type, ofgem_agent_df):
    for _, row in fuel_ratios.iterrows():
        archetype = row['Archetype']
        ratio = row['Fuel Consumption Ratio']
        # Add the ratio for the specific fuel type and archetype
        ofgem_agent_df.loc[fuel_type, archetype] = ratio

# Add the calculated ratios to the DataFrame
add_to_ofgem_agent_df(mains_gas_fuel_ratios, 'Mains gas', ofgem_agent_df)
add_to_ofgem_agent_df(oil_fuel_ratios, 'Oil', ofgem_agent_df)
add_to_ofgem_agent_df(other_fuel_ratios, 'Other (solid fuel/LPG)', ofgem_agent_df)
add_to_ofgem_agent_df(electricity_fuel_ratios, 'Electricity', ofgem_agent_df)

# Fill missing values with 0 for archetypes not using certain fuels
ofgem_agent_df = ofgem_agent_df.fillna(0)


# Add a new row with values "new" under the header of ofgem_agent_df
# Create a DataFrame with the same columns, and a single row of "New"
new_row = pd.DataFrame([["new"] * ofgem_agent_df.shape[1]], columns=ofgem_agent_df.columns)

# Concatenate the new row above the existing ofgem_agent_df
ofgem_agent_df_with_unit_row = pd.concat([new_row, ofgem_agent_df])

ofgem_agent_df_with_unit_row



Unnamed: 0,A1,A2,A3,B6,C7,C9,D10,D11,D12,E13,...,J23,G17,H19,J24,B5,G18,B4,C8,F15,F16
0,new,new,new,new,new,new,new,new,new,new,...,new,new,new,new,new,new,new,new,new,new
Mains gas,0.054528,0.047202,0.052977,0.052493,0.065431,0.068254,0.06973,0.0438,0.080124,0.083401,...,0.081446,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Oil,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.321914,0.267689,0.410398,0.0,0.0,0.0,0.0,0.0,0.0
Other (solid fuel/LPG),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.331666,0.0,0.0,0.370785,0.297549,0.0,0.0,0.0,0.0
Electricity,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.233978,0.0,0.170633,0.198156,0.244121,0.153112


In [10]:
# Define a function to map fuel types to ofgem_agent_df index
def map_fuel_to_type(fuel):
    if fuel == "NGA":
        return "Mains gas"
    elif fuel in ["OIL", "HCO"]:
        return "Oil"
    elif fuel == "ELC":
        return "Electricity"
    elif fuel == "HYDROGEN" or fuel == "SOLAR": # for hot-water, some technologies/process use solar as fuel
        return None  # Exclude HYDROGEN and SOLAR
    elif fuel == "-":
        return 0 # the Unit row
    else:
        return "Other (solid fuel/LPG)"

In [11]:
def add_agents_to_technodata(techno_data, ofgem_agent_df):
    # Add a new column for the mapped fuel type
    techno_data['MappedFuelType'] = techno_data['Fuel'].apply(map_fuel_to_type)

    # Merge the two DataFrames
    technodata_add_agents = techno_data.merge(
        ofgem_agent_df_with_unit_row ,  # Use ofgem_agent_df with unit row
        left_on='MappedFuelType',  # Match on the mapped fuel type
        right_index=True,  # Use the index of ofgem_agent_df (fuel types)
        how='left'  # Keep all rows from techno_data
    )


    #There are NaN values in the archetype columns for rows where 'Fuel' is 'HYDROGEN'. Fill these with 0.

    # Identify rows where 'Fuel' is 'HYDROGEN' or 'SOLAR'
    hydrogen_rows = (technodata_add_agents['Fuel'] == 'HYDROGEN') | (technodata_add_agents['Fuel'] == 'SOLAR')


    # Fill NaN values in archetype columns for these rows with 0
    archetype_columns = [col for col in technodata_add_agents.columns if col in ofgem_agent_df.columns]
    technodata_add_agents.loc[hydrogen_rows, archetype_columns] =  (
        technodata_add_agents.loc[hydrogen_rows, archetype_columns]
        .fillna(0)
        .infer_objects(copy=False))


    # Remove all columns containing "Agent", "agent", or "MappedFuelType"
    columns_to_drop = [col for col in technodata_add_agents.columns 
                                if "Agent" in col or "agent" in col or col == "MappedFuelType"]
    # Drop the columns
    technodata_add_agents.drop(columns=columns_to_drop, inplace=True)

    return technodata_add_agents

In [15]:
technodata = pd.read_csv(Path.home() / "Desktop" / "df_heating.csv")
technodata_add_agents = add_agents_to_technodata(technodata, ofgem_agent_df)
technodata_add_agents.to_csv(Path.home() / "Desktop" / "df_heating_agents.csv", index=False)


  .fillna(0)


In [14]:
technodata_add_agents

Unnamed: 0,ProcessName,RegionName,Time,cap_par,cap_exp,fix_par,fix_exp,var_par,var_exp,MaxCapacityAddition,...,J23,G17,H19,J24,B5,G18,B4,C8,F15,F16
0,RCHPEA-CCG00 [RES.CHP.EXISTING-AVERAGE: .00.CO...,UK,2010,423.684211,1,16.842105,1,0,1,2.0,...,0.081446,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,RCHPEA-CCG01 [RES.CHP.EXISTING-AVERAGE: .01.CO...,UK,2010,423.684211,1,16.842105,1,0,1,100000000.0,...,0.081446,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,RCHPEA-CCH01 [RES.CHP.EXISTING-AVERAGE: .01.CO...,UK,2010,423.684211,1,16.842105,1,0,1,100000000.0,...,0.0,0.0,0.0,0.0,0.233978,0.0,0.170633,0.198156,0.244121,0.153112
3,RCHPEA-FCH01 [RES.CHP.EXISTING-AVERAGE: .01.FU...,UK,2010,6456.049312,1,531.651429,1,0,1,100000000.0,...,0.0,0.0,0.0,0.0,0.233978,0.0,0.170633,0.198156,0.244121,0.153112
4,RCHPEA-STW01 [RES.CHP.EXISTING-AVERAGE: .01.ST...,UK,2010,534.534535,1,24.024024,1,0,1,100000000.0,...,0.0,0.0,0.0,0.0,0.233978,0.0,0.170633,0.198156,0.244121,0.153112
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,RWEAWHTRG00 [RES.WATER.EXISTING-AVERAGE: .00.N...,UK,2010,6.108552,1,0.610855,1,0,1,2.0,...,0.081446,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
97,RWEAWHTRG01 [RES.WATER.EXISTING-AVERAGE: .01.N...,UK,2010,6.108552,1,0.610855,1,0,1,100000000.0,...,0.081446,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
98,RWNASA01 [RES.WATER.NEW-AVERAGE: .01.STANDALON...,UK,2010,0.000593,1,0.000593,1,0,1,100000000.0,...,0.0,0.0,0.0,0.0,0.233978,0.0,0.170633,0.198156,0.244121,0.153112
99,RWNAWHTRE01 [RES.WATER.NEW-AVERAGE: .01.ELC.RE...,UK,2010,2.097148,1,0.209715,1,0,1,100000000.0,...,0.0,0.0,0.0,0.0,0.233978,0.0,0.170633,0.198156,0.244121,0.153112


In [216]:
MUSE_Files = "MUSE_Files"  # Define the MUSE_Files variable
subsectors = ["space_heat", "hot_water"]  # Define the space_heat variable

for sunsector in subsectors:
    technodata = pd.read_csv(Path.cwd().parent / MUSE_Files / sunsector / f"technodata_{sunsector}.csv").dropna()
    technodata_add_agents = add_agents_to_technodata(technodata, ofgem_agent_df)

    technodata_add_agents.to_csv(Path.cwd().parent / MUSE_Files / sunsector / f"technodata_{sunsector}_add_Ofgem_agents.csv", index=False)


  .fillna(0)
  .fillna(0)
