### This file creates 'AgentShare' parameters, and integrated it into 'Technodata.csv'

In [None]:
# autoreload
%load_ext autoreload
%autoreload 2

In [None]:
# pip install pdfplumber
import pandas as pd
from pathlib import Path
# import numpy as np


In [None]:
# the key is the consumption/demand name is MUSE
# the value is the name used in the HouseholdElectricitySurveyFinalReportissue4.pdf
non_heat_enduses_mapping = {
    'RES.COOKING': 'Cooking',
    'RES.COOLING': 'Cold appliances',
    'RES.CONSUMER-ELECTRONICS.TV': 'Audiovisual',
    'RES.LIGHTING': 'Lighting',
    'RES.REFRIGERATORS': 'Cold appliances',
    'RES.FREEZERS': 'Cold appliances',
    'RES.COMPUTERS': 'ICT',
    'RES.WET.APPLIANCES': 'Washing/Drying',
    'RES.OTHER': 'Other'
}

heating_enduses_mapping = {
    'RES.HOT-WATER': 'Water heating',
    'RES.SPACE-HEAT': 'Heating'
}
non_heat_enduses = list(non_heat_enduses_mapping.keys())
heating_enduses = list(heating_enduses_mapping.keys())

In [None]:
#### Ofgem data folder
data_folder = Path.cwd().parent / "Ofgem_Archetype"

# save the output files in the MUSE_Files folder
output_folder = Path.cwd().parent /  "Residential"/ "MUSE_files" / "Ofgem_agents"

# read the tables in the excel file
df_ofgem = pd.read_excel(data_folder / "Ofgem energy consumer archetypes2024_Tables1.xlsx", sheet_name="original").dropna()

# keep only the columns we need
col_to_keep = ['Archetype','Average Annual Elec consumption (kWh)','Average Annual Gas consumption (kWh)', 'Main heating Fuel']
df_ofgem = df_ofgem[col_to_keep]

# display the data
df_ofgem.head()

In [None]:
# read in the "Technodata.csv" with single agents (which was created in an earlier version)
technodata_df = pd.read_csv( Path.cwd().parent /"Buildings/MUSE_Files" / "Technodata.csv")

# Step 1: Identify columns with "new" in the "Unit" row, a.k.a the Agent columns
columns_to_drop = technodata_df.loc[0][technodata_df.loc[0] == "new"].index

# Step 2: Drop these old Agent columns from the DataFrame
technodata_df.drop(columns=columns_to_drop, inplace=True)

# Step 3: Extract the "Unit" row, which will be added back to the DataFrame later
unit_row = technodata_df.iloc[[0]]

# Step 4: Remove the "Unit" row from technodata_df for merging, this is optional since in step 5, the "Unit" row will not be included anyway.
# technodata_df_no_unit = technodata_df.iloc[1:].reset_index(drop=True)

# Step 5: split the df into two dfs based on the "EndUse" column
# Define the condition for splitting
non_heat_use = technodata_df['EndUse'].str.contains('|'.join(non_heat_enduses), case=False, na=False)
heating_use = technodata_df['EndUse'].str.contains('|'.join(heating_enduses), case=False, na=False)

# Split the DataFrame
df_non_heat = technodata_df[non_heat_use]
df_heating = technodata_df[heating_use]


(1) Non-heat

In [None]:
df_non_heat

In [None]:
from calculate_non_heat_share import non_heat_capacity_share

df1 = non_heat_capacity_share(df_non_heat, df_ofgem,non_heat_enduses_mapping)
df1.head()


(2) heating enduse

In [None]:
from calculate_heating_share import heating_capacity_share
df2 = heating_capacity_share(df_heating, df_ofgem)


In [None]:
df2

(3) combine dfs together

In [None]:
# Step 1: Identify newly added columns from enduse_shares_df
new_columns = set(df1.columns) - set(unit_row.columns) 


In [None]:
# Step 1: Identify newly added columns from enduse_shares_df
new_columns = new_columns = set(df1.columns) - set(unit_row.columns) 

# Step 2: Update the Unit row
unit_row = unit_row.copy()
for col in new_columns:
    unit_row[col] = 'new'

# Step 3: Add the updated Unit row back to the top
final_merged_df = pd.concat([unit_row, df1,df2], ignore_index=True)

# save the final df
final_merged_df.to_csv(output_folder / "Technodata.csv", index=False)

print(f"Technodata.csv is successfully saved to {output_folder / 'Technodata.csv'}")

In [None]:
final_merged_df