In [None]:
# pip install pdfplumber
import pandas as pd

In [None]:

data = df_ofgem

### Calculate agents' share based on their fuel type and usage

In [None]:
# Add a function to account for partial matches in heating fuel names
def calculate_main_fuel_ratios(data, heating_fuel, consumption_column):
    # Filter rows where the heating fuel contains the given type (case-insensitive, exact match avoided)
    filtered_data = data[data['Main heating Fuel'].str.contains(heating_fuel, case=False, na=False, regex=False)].copy()
    
    # Calculate total consumption of the heating fuel for the group
    total_fuel_consumption = filtered_data[consumption_column].sum()
    
    # Calculate the ratio for each archetype within the group
    filtered_data['Fuel Consumption Ratio'] = (
        filtered_data[consumption_column] / total_fuel_consumption
    )
    
    # Select relevant columns for display
    return filtered_data[['Archetype', consumption_column, 'Fuel Consumption Ratio']]

# Calculate ratios for "Mains gas" based on gas consumption
mains_gas_fuel_ratios = calculate_main_fuel_ratios(data, 'Mains gas', 'Average Annual Gas consumption (kWh)')

# Calculate ratios for "Oil" based on electricity consumption (no specific Oil column in the dataset)
oil_fuel_ratios = calculate_main_fuel_ratios(data, 'Oil', 'Average Annual Elec consumption (kWh)')

# Calculate ratios for "Other (solid fuel/LPG)" based on electricity consumption (assuming lack of specific column)
other_fuel_ratios = calculate_main_fuel_ratios(data, 'Other (solid fuel/LPG)', 'Average Annual Elec consumption (kWh)')

# Calculate ratios for all main heating fuel types, including "Electricity"
electricity_fuel_ratios = calculate_main_fuel_ratios(data, 'Electricity', 'Average Annual Elec consumption (kWh)')



In [None]:
# Prepare a DataFrame with columns for archetypes and rows for fuel types
fuel_types = ['Mains gas', 'Oil', 'Other (solid fuel/LPG)', 'Electricity']

# Initialize an empty DataFrame with fuel types as index
ofgem_agent_df = pd.DataFrame(index=fuel_types)

# Add values for each fuel type into the DataFrame
def add_to_ofgem_agent_df(fuel_ratios, fuel_type, ofgem_agent_df):
    for _, row in fuel_ratios.iterrows():
        archetype = row['Archetype']
        ratio = row['Fuel Consumption Ratio']
        # Add the ratio for the specific fuel type and archetype
        ofgem_agent_df.loc[fuel_type, archetype] = ratio

# Add the calculated ratios to the DataFrame
add_to_ofgem_agent_df(mains_gas_fuel_ratios, 'Mains gas', ofgem_agent_df)
add_to_ofgem_agent_df(oil_fuel_ratios, 'Oil', ofgem_agent_df)
add_to_ofgem_agent_df(other_fuel_ratios, 'Other (solid fuel/LPG)', ofgem_agent_df)
add_to_ofgem_agent_df(electricity_fuel_ratios, 'Electricity', ofgem_agent_df)

# Fill missing values with 0 for archetypes not using certain fuels
ofgem_agent_df = ofgem_agent_df.fillna(0)


# Add a new row with values "new" under the header of ofgem_agent_df
# Create a DataFrame with the same columns, and a single row of "New"
new_row = pd.DataFrame([["new"] * ofgem_agent_df.shape[1]], columns=ofgem_agent_df.columns)

# Concatenate the new row above the existing ofgem_agent_df
ofgem_agent_df_with_unit_row = pd.concat([new_row, ofgem_agent_df])

ofgem_agent_df_with_unit_row



In [None]:
# Define a function to map fuel types to ofgem_agent_df index
def map_fuel_to_type(fuel):
    if fuel == "NGA":
        return "Mains gas"
    elif fuel in ["OIL", "HCO"]:
        return "Oil"
    elif fuel == "ELC":
        return "Electricity"
    elif fuel == "HYDROGEN" or fuel == "SOLAR": # for hot-water, some technologies/process use solar as fuel
        return None  # Exclude HYDROGEN and SOLAR
    elif fuel == "-":
        return 0 # the Unit row
    else:
        return "Other (solid fuel/LPG)"

In [None]:
def add_agents_to_technodata(techno_data, ofgem_agent_df):
    # Add a new column for the mapped fuel type
    techno_data['MappedFuelType'] = techno_data['Fuel'].apply(map_fuel_to_type)

    # Merge the two DataFrames
    technodata_add_agents = techno_data.merge(
        ofgem_agent_df_with_unit_row ,  # Use ofgem_agent_df with unit row
        left_on='MappedFuelType',  # Match on the mapped fuel type
        right_index=True,  # Use the index of ofgem_agent_df (fuel types)
        how='left'  # Keep all rows from techno_data
    )


    #There are NaN values in the archetype columns for rows where 'Fuel' is 'HYDROGEN'. Fill these with 0.

    # Identify rows where 'Fuel' is 'HYDROGEN' or 'SOLAR'
    hydrogen_rows = (technodata_add_agents['Fuel'] == 'HYDROGEN') | (technodata_add_agents['Fuel'] == 'SOLAR')


    # Fill NaN values in archetype columns for these rows with 0
    archetype_columns = [col for col in technodata_add_agents.columns if col in ofgem_agent_df.columns]
    technodata_add_agents.loc[hydrogen_rows, archetype_columns] =  (
        technodata_add_agents.loc[hydrogen_rows, archetype_columns]
        .fillna(0)
        .infer_objects(copy=False))


    # Remove all columns containing "Agent", "agent", or "MappedFuelType"
    columns_to_drop = [col for col in technodata_add_agents.columns 
                                if "Agent" in col or "agent" in col or col == "MappedFuelType"]
    # Drop the columns
    technodata_add_agents.drop(columns=columns_to_drop, inplace=True)

    return technodata_add_agents

In [None]:
technodata = pass
technodata_add_agents = add_agents_to_technodata(technodata, ofgem_agent_df)
