### This file creates 'AgentShare' parameters, which is used in 'Technodata.csv'

In [None]:
# autoreload
%load_ext autoreload
%autoreload 2

In [40]:
# pip install pdfplumber
import pandas as pd
from pathlib import Path
# import numpy as np


In [41]:
# the key is the consumption/demand name is MUSE
# the value is the name used in the HouseholdElectricitySurveyFinalReportissue4.pdf
non_heat_enduses_mapping = {
    'RES.COOKING': 'Cooking',
    'RES.COOLING': 'Cold appliances',
    'RES.CONSUMER-ELECTRONICS.TV': 'Audiovisual',
    'RES.LIGHTING': 'Lighting',
    'RES.REFRIGERATORS': 'Cold appliances',
    'RES.FREEZERS': 'Cold appliances',
    'RES.COMPUTERS': 'ICT',
    'RES.WET.APPLIANCES': 'Washing/Drying',
    'RES.OTHER': 'Other'
}

heating_enduses_mapping = {
    'RES.HOT-WATER': 'Water heating',
    'RES.SPACE-HEAT': 'Heating'
}
non_heat_enduses = list(non_heat_enduses_mapping.keys())
heating_enduses = list(heating_enduses_mapping.keys())

In [42]:
# Define the logic to categorize heating type
# This is based in the survey data in the HouseholdElectricitySurveyFinalReportissue4.pdf
def categorize_heating(fuel):
    if fuel == "Mains gas":
        return "without_elc_heating"

    elif fuel == "Electricity":
        return "with_primary_elc_heating"

    else:
        return "with_addition_elc_heating"


In [43]:
# Define the data as a list of lists
# reference: HouseholdElectricitySurveyFinalReportissue4.pdf
data = [
	["Enduse", "without_elc_heating", "with_addition_elc_heating", "with_primary_elc_heating"],
	["Cold appliances", "16.20%", "13.40%", "4.70%"],
	["Cooking", "13.80%", "11.70%", "7.20%"],
	["Lighting", "15.40%", "10.00%", "5.80%"],
	["Audiovisual", "14.40%", "10.40%", "3.40%"],
	["ICT", "6.10%", "3.60%", "2.60%"],
	["Washing/Drying", "13.60%", "10.70%", "3.10%"],
	["Heating", "0", "22.50%", "64.20%"],
	["Water heating", "7.10%", "4.00%", "6.30%"],
	["Other", "3.70%", "5.80%", "1.50%"],
	["Not_known", "9.70%", "7.90%", "1.20%"]
]

# Convert the data into a pandas DataFrame
enduse_multipliers = pd.DataFrame(data[1:], columns=data[0])
enduse_multipliers.set_index('Enduse', inplace=True)

# Convert percentage strings to numerical values
enduse_multipliers = enduse_multipliers.map(lambda x: float(x.strip('%')) / 100 if isinstance(x, str) and '%' in x else x)


# Display the DataFrame
# enduse_multipliers


In [None]:
df = pass
# Apply the logic to create the "heating type" column
df['heating type'] = df['Main heating Fuel'].apply(categorize_heating)

# display the data
# df.head()

In [45]:
# Function to get the appropriate multiplier
def get_multiplier(enduse,heating_type):
    try:
        return enduse_multipliers.loc[enduse,heating_type]
    except KeyError:
        raise KeyError(f"Invalid heating type or enduse: {heating_type}, {enduse}")
    
# get_multiplier( "Cooking", "without_elc_heating")

In [None]:
for item in list(non_heat_enduses_mapping.keys()):
    enduse = non_heat_enduses_mapping[item]

    # Calculate electricity consumption from cooking 
    consumption = df['Average Annual Elec consumption (kWh)'] * df['heating type'].apply(lambda x: get_multiplier(enduse,x))

    # Calculate the sum of the 'heating_ratio' column
    total_cooking_con = consumption.sum()

    # Normalize by dividing each value by the total sum
    df[item] = consumption / total_cooking_con

    if item == 'REFRIGERATORS' or item == 'FREEZERS':
        df[item] /= 2 # divide by 2 to account for the fact that the data is for both refrigerators and freezers

=============================================================================================

In [49]:
col_to_ignore = ['Average Annual Elec consumption (kWh)','Average Annual Gas consumption (kWh)', 'Main heating Fuel', 'heating type']
# Exclude columns "Col2" and "Col4"
columns_to_keep = [col for col in df.columns if col not in col_to_ignore]
enduse_shares_df = df[columns_to_keep]

In [None]:
enduse_shares_df.set_index('Archetype', inplace=True)
enduse_shares_df = enduse_shares_df.T
# enduse_shares_df.to_csv(data_folder / "enduse_shares.csv")

# print("Enduse shares extracted successfully saved to " + str(data_folder / "enduse_shares.csv"))

In [None]:
# Extract unique index categories
enduse_shares_categories = enduse_shares_df.index.unique()

# Define a function to find the first matching index in enduse_shares_df based on "contain" logic
def find_containing_category(enduse_value, categories):
    for category in categories:
        if category in str(enduse_value):
            return category
    return None

# # Apply the function to create a mapping column in technodata_df
# technodata_df_no_unit['Mapped_EndUse'] = technodata_df_no_unit['EndUse'].apply(
#     lambda x: find_containing_category(x, enduse_shares_categories)
# )
# Step 3: Perform the merge with inline mapping
merged_no_unit_df = pd.merge(
    df_non_heat,
    enduse_shares_df,
    left_on=df_non_heat['EndUse'].apply(
        lambda x: find_containing_category(x, enduse_shares_categories)
    ),
    right_index=True,
    how='left'
)
