### This file creates 'AgentShare' parameters, which is used in 'Technodata.csv'

In [197]:
# pip install pdfplumber
import pandas as pd
from pathlib import Path
import numpy as np


In [None]:
# Define the logic to categorize heating type
# This is based in the survey data in the PDF file
def categorize_heating(fuel):
    if fuel == "Mains gas":
        return "without_elc_heating"

    elif fuel == "Electricity":
        return "with_primary_elc_heating"

    else:
        return "with_addition_elc_heating"


In [199]:
enduses_mapping = {'RES.COOKING':'Cooking'
           , 'RES.COOLING': 'Cold appliances'
           , 'RES.CONSUMER-ELECTRONICS.TV' : 'Audiovisual'
           , 'RES.LIGHTING' : 'Lighting'
           , 'RES.HOT-WATER' : 'Water heating'
           , 'RES.SPACE-HEAT': 'Heating'
           , 'RES.REFRIGERATORS': 'Cold appliances'
           , 'RES.FREEZERS': 'Cold appliances'
           , 'RES.COMPUTERS': 'ICT'
           , 'RES.WET.APPLIANCES': 'Washing/Drying'
           , 'RES.OTHER': 'Other'
}

In [200]:

# Define the data as a list of lists
data = [
	["Enduse", "without_elc_heating", "with_addition_elc_heating", "with_primary_elc_heating"],
	["Cold appliances", "16.20%", "13.40%", "4.70%"],
	["Cooking", "13.80%", "11.70%", "7.20%"],
	["Lighting", "15.40%", "10.00%", "5.80%"],
	["Audiovisual", "14.40%", "10.40%", "3.40%"],
	["ICT", "6.10%", "3.60%", "2.60%"],
	["Washing/Drying", "13.60%", "10.70%", "3.10%"],
	["Heating", "0", "22.50%", "64.20%"],
	["Water heating", "7.10%", "4.00%", "6.30%"],
	["Other", "3.70%", "5.80%", "1.50%"],
	["Not_known", "9.70%", "7.90%", "1.20%"]
]

# Convert the data into a pandas DataFrame
enduse_multipliers = pd.DataFrame(data[1:], columns=data[0])
enduse_multipliers.set_index('Enduse', inplace=True)

# Convert percentage strings to numerical values
enduse_multipliers = enduse_multipliers.map(lambda x: float(x.strip('%')) / 100 if isinstance(x, str) and '%' in x else x)


# Display the DataFrame
enduse_multipliers


Unnamed: 0_level_0,without_elc_heating,with_addition_elc_heating,with_primary_elc_heating
Enduse,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Cold appliances,0.162,0.134,0.047
Cooking,0.138,0.117,0.072
Lighting,0.154,0.1,0.058
Audiovisual,0.144,0.104,0.034
ICT,0.061,0.036,0.026
Washing/Drying,0.136,0.107,0.031
Heating,0.0,0.225,0.642
Water heating,0.071,0.04,0.063
Other,0.037,0.058,0.015
Not_known,0.097,0.079,0.012


In [201]:
# Function to get the appropriate multiplier
def get_multiplier(enduse,heating_type):
    try:
        return enduse_multipliers.loc[enduse,heating_type]
    except KeyError:
        raise KeyError(f"Invalid heating type or enduse: {heating_type}, {enduse}")
    
# get_multiplier( "Cooking", "without_elc_heating")

In [202]:
#### Ofgem data folder
data_folder = Path.cwd().parent / "Ofgem_Archetype"

# read the tables in the excel file
df = pd.read_excel(data_folder / "Ofgem energy consumer archetypes2024_Tables1.xlsx", sheet_name="cleaned").dropna()

# Apply the logic to create the "heating type" column
df['heating type'] = df['Main heating Fuel'].apply(categorize_heating)

# display the data
# df.head()

In [203]:
for item in list(enduses_mapping.keys()):
    enduse = enduses_mapping[item]
    if enduse == 'Heating' or enduse == 'Water heating':
        continue
    else:
        # Calculate electricity consumption from cooking 
        consumption = df['Average Annual Elec consumption (kWh)'] * df['heating type'].apply(lambda x: get_multiplier(enduse,x))

        # Calculate the sum of the 'heating_ratio' column
        total_cooking_con = consumption.sum()

        # Normalize by dividing each value by the total sum
        df[item] = consumption / total_cooking_con

        if item == 'REFRIGERATORS' or item == 'FREEZERS':
            df[item] /= 2 # divide by 2 to account for the fact that the data is for both refrigerators and freezers

In [204]:
df

Unnamed: 0,Archetype,Average Annual Elec consumption (kWh),Average Annual Gas consumption (kWh),Main heating Fuel,heating type,RES.COOKING,RES.COOLING,RES.CONSUMER-ELECTRONICS.TV,RES.LIGHTING,RES.REFRIGERATORS,RES.FREEZERS,RES.COMPUTERS,RES.WET.APPLIANCES,RES.OTHER
0,A1,2742,10933,Mains gas,without_elc_heating,0.029124,0.030475,0.032151,0.032393,0.030475,0.030475,0.032963,0.031316,0.023014
1,A2,2849,9464,Mains gas,without_elc_heating,0.030261,0.031665,0.033405,0.033657,0.031665,0.031665,0.03425,0.032538,0.023912
2,A3,3519,10622,Mains gas,without_elc_heating,0.037377,0.039111,0.041261,0.041572,0.039111,0.039111,0.042304,0.04019,0.029536
3,B4,4811,0,Electricity,with_primary_elc_heating,0.026661,0.015513,0.013319,0.021405,0.015513,0.015513,0.024652,0.012524,0.01637
4,B5,6597,0,Electricity/Other (Solid fuel/LPG),with_addition_elc_heating,0.059407,0.060648,0.055865,0.050606,0.060648,0.060648,0.046804,0.059277,0.086797
5,B6,3028,10525,Mains gas,without_elc_heating,0.032162,0.033654,0.035504,0.035771,0.033654,0.033654,0.036402,0.034582,0.025415
6,C7,3649,13119,Mains gas,without_elc_heating,0.038758,0.040556,0.042786,0.043107,0.040556,0.040556,0.043867,0.041674,0.030627
7,C8,5587,0,Electricity,with_primary_elc_heating,0.030961,0.018015,0.015468,0.024858,0.018015,0.018015,0.028628,0.014544,0.019011
8,C9,3337,13685,Mains gas,without_elc_heating,0.035444,0.037089,0.039127,0.039422,0.037089,0.037089,0.040116,0.038111,0.028008
9,D10,3881,13981,Mains gas,without_elc_heating,0.041222,0.043135,0.045506,0.045848,0.043135,0.043135,0.046656,0.044324,0.032574


In [205]:
col_to_ignore = ['Average Annual Elec consumption (kWh)','Average Annual Gas consumption (kWh)', 'Main heating Fuel', 'heating type']
# Exclude columns "Col2" and "Col4"
columns_to_keep = [col for col in df.columns if col not in col_to_ignore]
enduse_shares_df = df[columns_to_keep]

In [206]:
enduse_shares_df.set_index('Archetype', inplace=True)

In [207]:
enduse_shares_df = enduse_shares_df.T
enduse_shares_df.to_csv(data_folder / "enduse_shares.csv")

print("Enduse shares extracted successfully saved to " + str(data_folder / "enduse_shares.csv"))

Enduse shares extracted successfully saved to c:\Users\jyang8\MUSE_models\MUSE_buildings\Ofgem_Archetype\enduse_shares.csv


In [208]:
technodata_df = pd.read_csv( Path.cwd().parent /"Buildings/MUSE_Files" / "Technodata.csv")

# Step 1: Identify columns with "new" in the "Unit" row, a.k.a the Agent columns
columns_to_drop = technodata_df.loc[0][technodata_df.loc[0] == "new"].index

# Step 2: Drop these Agent columns from the DataFrame
technodata_df.drop(columns=columns_to_drop, inplace=True)

# Step 3: Extract the "Unit" row
unit_row = technodata_df.iloc[[0]]

# Step 4: Remove the "Unit" row from technodata_df for merging
technodata_df_no_unit = technodata_df.iloc[1:].reset_index(drop=True)

In [209]:
# Extract unique index categories
enduse_shares_categories = enduse_shares_df.index.unique()

# Define a function to find the first matching index in enduse_shares_df based on "contain" logic
def find_containing_category(enduse_value, categories):
    for category in categories:
        if category in str(enduse_value):
            return category
    return None

# # Apply the function to create a mapping column in technodata_df
# technodata_df_no_unit['Mapped_EndUse'] = technodata_df_no_unit['EndUse'].apply(
#     lambda x: find_containing_category(x, enduse_shares_categories)
# )
# Step 3: Perform the merge with inline mapping
merged_no_unit_df = pd.merge(
    technodata_df_no_unit,
    enduse_shares_df,
    left_on=technodata_df_no_unit['EndUse'].apply(
        lambda x: find_containing_category(x, enduse_shares_categories)
    ),
    right_index=True,
    how='left'
)

# # Perform the merge with the new logic-based mapping
# merged_no_unit_df = technodata_df_no_unit.join(enduse_shares_df, how='left', on='Mapped_EndUse')




In [210]:
# Step 1: Identify newly added columns from enduse_shares_df
new_columns = set(merged_no_unit_df.columns) - set(technodata_df_no_unit.columns)

# Step 2: Update the Unit row
unit_row = unit_row.copy()
for col in new_columns:
    unit_row[col] = 'new'

# Step 3: Add the updated Unit row back to the top
final_merged_df = pd.concat([unit_row, merged_no_unit_df], ignore_index=True)



In [211]:
final_merged_df.to_csv(data_folder / "tech.csv")