In [36]:
# pip install pdfplumber
import pandas as pd
from pathlib import Path
import numpy as np

from sklearn.linear_model import LinearRegression

In [37]:
#### TIMES data folder
data_folder = Path.cwd().parent / "Ofgem_Archetype"

# read the tables in the excel file
df = pd.read_excel(data_folder / "Ofgem energy consumer archetypes2024_Tables1.xlsx",sheet_name="Mains gas").dropna()

# display the data
df


Unnamed: 0,Archetype,Average Annual Elec consumption (kWh),Average Annual Gas consumption (kWh),Main heating Fuel
0,A1,2742,10933,Mains gas
1,A2,2849,9464,Mains gas
2,A3,3519,10622,Mains gas
3,B6,3028,10525,Mains gas
4,C7,3649,13119,Mains gas
5,C9,3337,13685,Mains gas
6,D10,3881,13981,Mains gas
7,D11,2482,8782,Mains gas
8,D12,3952,16065,Mains gas
9,E13,5075,16722,Mains gas


### Predict the consumption (kwh) of Archetypes that use Oil/Other/Solid fuel/LPG as main heating fuels

In [38]:
# Filter rows where 'Main heating Fuel' is 'Mains gas' and drop rows with NaN in the required columns
filtered_df = df[df['Main heating Fuel'] == 'Mains gas'].dropna(
    subset=['Average Annual Elec consumption (kWh)', 'Average Annual Gas consumption (kWh)']
)

# Prepare the data for linear regression
X = filtered_df[['Average Annual Elec consumption (kWh)']]  # Keep feature names as DataFrame
y = filtered_df['Average Annual Gas consumption (kWh)'].values  # Target as array

# Create and train the linear regression model
model = LinearRegression()
model.fit(X, y)

# Filter rows where 'Main heating Fuel' is NOT 'Mains gas', 'Electricity', or 'Electricity/Other (Solid fuel/LPG)'
non_main_fuel_rows = df[
    ~df['Main heating Fuel'].isin(['Mains gas', 'Electricity', 'Electricity/Other (Solid fuel/LPG)'])
].copy()  # Create a copy to avoid SettingWithCopyWarning

# Predict the 'Average Annual consumption (kWh)' for these rows
non_main_fuel_rows['Predicted Consumption (kWh)'] = model.predict(
    non_main_fuel_rows[['Average Annual Elec consumption (kWh)']]
)

In [39]:
non_main_fuel_rows

Unnamed: 0,Archetype,Average Annual Elec consumption (kWh),Average Annual Gas consumption (kWh),Main heating Fuel,Predicted Consumption (kWh)
15,G17,5901,0,Oil/Other (solid fuel/LPG),21410.232219
16,G18,5294,0,Other (solid fuel/LPG),19224.22988
17,H19,4907,0,Oil,17830.51834
18,J24,7523,0,Oil,27251.576195
19,F15,6883,0,Other/Electricity,24946.730237


In [40]:
# Add the 'Predicted Consumption (kWh)' column to the original DataFrame (set to NaN for rows not in non_main_fuel_rows)
df['Predicted Consumption (kWh)'] = np.nan
df.loc[non_main_fuel_rows.index, 'Predicted Consumption (kWh)'] = non_main_fuel_rows['Predicted Consumption (kWh)']

In [41]:
# Calculate 'demand_share'
df['demand_share'] = np.where(
    df['Predicted Consumption (kWh)'].notna(),  # If there is a predicted value
    df['Predicted Consumption (kWh)'] + df['Average Annual Elec consumption (kWh)'],  # Use predicted value
    df['Average Annual Gas consumption (kWh)'] + df['Average Annual Elec consumption (kWh)']  # Use actual value
)

In [42]:
# Calculate the sum of the 'demand_share' column (will be used as "demand_share" in the Agent.csv file)
total_consumption_sum = df['demand_share'].sum()

# Add a new column 'demand_share'
df['demand_share'] = df['demand_share'] / total_consumption_sum

# display the df
df

Unnamed: 0,Archetype,Average Annual Elec consumption (kWh),Average Annual Gas consumption (kWh),Main heating Fuel,Predicted Consumption (kWh),demand_share
0,A1,2742,10933,Mains gas,,0.032715
1,A2,2849,9464,Mains gas,,0.029457
2,A3,3519,10622,Mains gas,,0.03383
3,B6,3028,10525,Mains gas,,0.032424
4,C7,3649,13119,Mains gas,,0.040115
5,C9,3337,13685,Mains gas,,0.040723
6,D10,3881,13981,Mains gas,,0.042732
7,D11,2482,8782,Mains gas,,0.026947
8,D12,3952,16065,Mains gas,,0.047888
9,E13,5075,16722,Mains gas,,0.052146


## Create the Agent class and create Agent.csv for MUSE input

In [43]:
# define the Agent class
class Agent:
    """
    Represents an agent with various attributes and methods.
    Attributes:
        Name (str): The name of the agent.
        AgentShare (float): The share of the agent.
        RegionName (str): The name of the region.
        Objective1 (str): The first objective of the agent.
        Objective2 (str): The second objective of the agent.
        Objective3 (str): The third objective of the agent.
        ObjData1 (float): A weight associated with the first objective
        ObjData2 (float): A weight associated with the second objective.
        ObjData3 (float): A weight associated with the third objective.
        Objsort1 (Boolean): Sets whether first objective is maximized or minimized. For both “adhoc” and “scipy” solvers this should be set to “True” for minimization and “False” for maximisation.
        Objsort2 (Boolean): The sorting criteria for the second objective.
        Objsort3 (Boolean): The sorting criteria for the third objective.
        SearchRule (str): The search rule for the agent.
        Quantity (float): Represents the fraction of new demand that is assigned to the agent 
        MaturityThreshold (float): The maturity threshold of the agent. Only applies when using the maturity search rule. 
        SpendLimit (int, optional): Only applies when using the spend_limit search rule.  
        AgentType (str): The type of the agent. Defaults to 'New'. (In MUSE this parameter is called "Type")
        (InitialShare (dict): The initial share of the agent. Defaults to None.)
    Methods:
        method1(): Placeholder method. Define the functionality of method1 here.
        method2(): Placeholder method. Define the functionality of method2 here.
    """
    instances = []  # Class variable to track instances of the class

    def __init__(self
                 , Name
                 , AgentShare
                 , RegionName = "UK"
                 , Objective1 = "LCOE"
                 , Objective2 = ""
                 , Objective3 = ""
                 , ObjData1 = 1
                 , ObjData2 = ""
                 , ObjData3 = ""
                 , Objsort1 = True
                 , Objsort2 = False
                 , Objsort3 = False
                 , SearchRule = "all"
                 , Quantity = 1
                 , MaturityThreshold = 0
                 , DecisionMethod = "singleObj"
                 , SpendLimit = 99999999999
                 , AgentType = 'New'
                 ):
        # Initialize any attributes or variables here
        self.Name = Name
        self.AgentShare = AgentShare
        self.RegionName = RegionName #if RegionName else "UK"
        self.Objective1 = Objective1 #if Objective1 else "TRUE"
        self.Objective2 = Objective2 #if Objective2 else "FALSE"
        self.Objective3 = Objective3 #if Objective3 else "FALSE"
        self.ObjData1 = ObjData1
        self.ObjData2 = ObjData2
        self.ObjData3 = ObjData3
        self.Objsort1 = Objsort1
        self.Objsort2 = Objsort2
        self.Objsort3 = Objsort3
        self.SearchRule = SearchRule
        self.Quantity = Quantity
        self.MaturityThreshold = MaturityThreshold
        self.DecisionMethod = DecisionMethod
        self.SpendLimit = SpendLimit
        self.AgentType = AgentType
        # InitialShare is expected to be a dictionary with keys as agent's number and values are shares(0 to 1) of the agent in each of the technology
        #self.InitialShare = InitialShare if InitialShare is not None else {}

        # Add each instance to the list
        Agent.instances.append(self)  

   

    def method1(self):
        # Define the functionality of method1 here
        pass

    def method2(self):
        # Define the functionality of method2 here
        pass

    # Add more methods as needed




### Create instances of the Agents (based on the demand share data calculated above)

In [44]:
# Iterate through each row of the DataFrame to create instances of Agent
for _, row in df.iterrows():
    # Extract the "Archetype" and "demand_share" values from the row
    name = row['Archetype']
    AgentShare = row['Archetype']
    demand_share = row['demand_share']
    
    # Create an instance of Agent and it will be automatically append it to the 'Agent.instances' list
    agent_instance = Agent(Name=name, AgentShare = AgentShare, Quantity=demand_share)
   

# Now, 'Agent.instances' is a list of Agent instances


In [45]:
# Use vars() or __dict__ to extract all attributes of each Agent instance
data = [vars(agent) for agent in Agent.instances]

# Create a new DataFrame from the list of dictionaries
agents_df = pd.DataFrame(data)

# Rename 'AgentType' to 'Type' as MUSE expects.
agents_df.rename(columns={'AgentType': 'Type'}, inplace=True) 

# Display the agents_df
agents_df


Unnamed: 0,Name,AgentShare,RegionName,Objective1,Objective2,Objective3,ObjData1,ObjData2,ObjData3,Objsort1,Objsort2,Objsort3,SearchRule,Quantity,MaturityThreshold,DecisionMethod,SpendLimit,Type
0,A1,A1,UK,LCOE,,,1,,,True,False,False,all,0.032715,0,singleObj,99999999999,New
1,A2,A2,UK,LCOE,,,1,,,True,False,False,all,0.029457,0,singleObj,99999999999,New
2,A3,A3,UK,LCOE,,,1,,,True,False,False,all,0.03383,0,singleObj,99999999999,New
3,B6,B6,UK,LCOE,,,1,,,True,False,False,all,0.032424,0,singleObj,99999999999,New
4,C7,C7,UK,LCOE,,,1,,,True,False,False,all,0.040115,0,singleObj,99999999999,New
5,C9,C9,UK,LCOE,,,1,,,True,False,False,all,0.040723,0,singleObj,99999999999,New
6,D10,D10,UK,LCOE,,,1,,,True,False,False,all,0.042732,0,singleObj,99999999999,New
7,D11,D11,UK,LCOE,,,1,,,True,False,False,all,0.026947,0,singleObj,99999999999,New
8,D12,D12,UK,LCOE,,,1,,,True,False,False,all,0.047888,0,singleObj,99999999999,New
9,E13,E13,UK,LCOE,,,1,,,True,False,False,all,0.052146,0,singleObj,99999999999,New


In [46]:
agents_df.to_csv(data_folder / "Agents.csv", index=False)