* Create MUSE input files, which include:

    1.  MUSE input file: "Consumption*.csv"
    2. MUSE input file: "TechnodataTimeslices.csv" 
    3. MUSE input file: "Technodata.csv"
    4. MUSE input file: "GlobalCommodities.csv"
    5. MUSE input file: "CommIn.csv and CommOut.csv"
    6. MUSE input file: "Projections.csv"
    7. MUSE input file: "ExistingCapacity.csv" 
    8. MUSE input file: "Agent.csv" 


In [1]:
# autoreload
%load_ext autoreload
%autoreload 2
# save the notebook every 10 second
%autosave 10

Autosaving every 10 seconds


In [2]:
## Import libraries
import pandas as pd
import numpy as np
from pathlib import Path
import re
from difflib import get_close_matches


In [3]:
#### TIMES data folder
data_folder =Path.cwd().parent / 'TIMES_data_Service/'

# folder to save output
output_folder = Path.cwd().parent / 'MUSE_Files'


In [4]:
# set the base year
base_year = 2010

# set region
Region = 'UK'

## Create 1. "Consumptions*.csv"

In [5]:
# select the scenario that we are interested in (for comsuption projection)
scenario = "REFScenario_DECC-Central" #There is no data for "BASE" scenario after 2010, so we select "REFScenario_DECC-Central" scenario or any other scenarios.

# define the years that we are interested in
years = list(range(2010, 2051, 10)) #[2010, 2020, 2030, 2040, 2050]

In [6]:
# (1) Read-in the Demand values.

# First we need the following file from TIMES data set
data_file1 = 'Services sector end use demands - all scenarios - PJ.xlsx'

# Read the data into a DataFrame
df1 = pd.read_excel(data_folder / data_file1, skiprows=[0]) # skip the first row


# find all the demand/service commodity types
demand_types = df1['commodity'].unique()
# print(demand_types)

# use the value from BASE scenario for the year 2010 in the selected scenario
for demand_name in demand_types:
    # print("\n",demand_name)
    df1.loc[(df1['commodity'] == demand_name) & (df1['scenario'] == scenario), base_year] = df1.loc[(df1['commodity'] == demand_name) & (df1['scenario'] == 'BASE'), base_year].values[0]

# select the rows that we are interested in, meaning dropping the BASE scenario in df1.
df1 = df1[df1['scenario']==scenario]

# we do not the attribute and scenario coloumns
df1.drop(columns=['attribute','scenario'], inplace=True) 


## set the commodity column as the index, so that we can combine the DataFrames based on the commodity column
df1.set_index('commodity', inplace=True)

# Display the DataFrame
df1.head()

Unnamed: 0_level_0,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,...,2041,2042,2043,2044,2045,2046,2047,2048,2049,2050
commodity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SCH [SER.DEMAND.COOLING.HIGH-CONSUMPTION.],94.622359,95.870658,97.118957,98.367255,99.615554,100.863853,102.005951,103.148048,104.290145,105.432242,...,142.498295,144.560682,146.623069,148.685456,150.747843,152.994913,155.241984,157.489054,159.736125,161.983195
SCK [SER.DEMAND.COOKING.],40.786064,41.324131,41.862199,42.400266,42.938333,43.476401,43.968691,44.460981,44.953271,45.445561,...,61.422529,62.311501,63.200473,64.089446,64.978418,65.946996,66.915574,67.884152,68.852731,69.821309
SCP [SER.DEMAND.COMPUTING.],12.756241,12.924527,13.092813,13.261099,13.429384,13.59767,13.751639,13.905607,14.059576,14.213545,...,19.210498,19.488533,19.766567,20.044602,20.322637,20.625569,20.928502,21.231434,21.534366,21.837299
SHH [SER.DEMAND.SPACE-HEAT.HIGH-CONSUMPTION.],174.106824,176.403716,178.700608,180.9975,183.294393,185.591285,187.692764,189.794244,191.895723,193.997202,...,262.199399,265.994227,269.789056,273.583885,277.378713,281.513363,285.648013,289.782662,293.917312,298.051961
SHL [SER.DEMAND.SPACE-HEAT.LOW-CONSUMPTION.],82.695773,83.786731,84.877689,85.968648,87.059606,88.150564,89.148707,90.146849,91.144992,92.143135,...,124.537232,126.339667,128.142103,129.944538,131.746973,133.710814,135.674654,137.638495,139.602336,141.566176


In [7]:
# (2) Read-in the "Time-slice" data.

# Then we need the following file from TIMES data set
data_file4 = 'Services Sector COM_FR - fraction of each demand in each time slice.xlsx'
              
# Read the data into a DataFrame df2
df2 = pd.read_excel(data_folder / data_file4, skiprows=[0,1]) # skip the first 2 rows
df2.drop(columns=['attribute','scenario','time_slice'], inplace=True) # we do not the attribute coloumn

# transform the DataFrame
df2 = df2.T

# # display the DataFrame
df2.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
SCH [SER.DEMAND.COOLING.HIGH-CONSUMPTION.],0.1422,0.0108,0.0108,0.0162,0.1422,0.0108,0.0108,0.0162,0.474,0.036,0.036,0.054,0.0316,0.0024,0.0024,0.0036
SCK [SER.DEMAND.COOKING.],0.15,0.025,0.0,0.075,0.15,0.025,0.0,0.075,0.15,0.025,0.0,0.075,0.15,0.025,0.0,0.075
SCP [SER.DEMAND.COMPUTING.],0.1625,0.025,0.0375,0.025,0.1625,0.025,0.0375,0.025,0.1625,0.025,0.0375,0.025,0.1625,0.025,0.0375,0.025
SHH [SER.DEMAND.SPACE-HEAT.HIGH-CONSUMPTION.],0.094499,0.023822,0.026632,0.049735,0.11467,0.030332,0.034593,0.063001,0.018626,0.00056,0.000918,0.0,0.251344,0.072097,0.073322,0.14585
SHL [SER.DEMAND.SPACE-HEAT.LOW-CONSUMPTION.],0.100379,0.035887,0.037748,0.053389,0.124782,0.043784,0.045784,0.065858,0.010204,0.003944,0.006619,0.00462,0.226215,0.076532,0.062134,0.102121


In [8]:
# In the next step, we will rename the column names to match the EndUse column in Technodata.csv, as this will be used in the "GlobalCommodities.csv" file in MUSE.
## Function to transform each column name
def rename_demand_name(col_name):
    # Use regex to find the "DEMAND" part and capture the subsequent parts
    match = re.search(r'SER\.DEMAND\.([A-Z-]+(?:\.[A-Z-]+)?)', col_name)
    if match:
        # Split by '.' to get the parts after "DEMAND"
        parts = match.group(1).split('.')
        # Limit to at most 2 parts after "SER" and join with '.'
        transformed = "SER." + ".".join(parts[:2])
        return transformed
    return col_name  # Return original name if it doesn't match the pattern

In [9]:
# (3) Use df1 and df2 to create Comsuption*.csv

# Ensure the indices are identical and aligned
df1 = df1.sort_index()
df2 = df2.sort_index()


# Multiply each column in df1 with each column in df2
for year in years:
    comsuption = {}
    for demand_name in df2.columns:
        comsuption[f'{demand_name}'] = df1[year] * df2[demand_name]
    
    comsuption_df = pd.DataFrame.from_dict(comsuption, orient='index')

    # Apply transformation to each column name
    comsuption_df.columns = [rename_demand_name(col) for col in comsuption_df.columns]

    # add additional columns as required by MUSE
    comsuption_df['RegionName'] = Region # 'UK'
    #comsuption_df['ProcessName'] = 'service' # deprecated from MUSE 
    comsuption_df['Timeslice'] = list(range(1, len(comsuption_df.index) + 1))


    # Define the desired column order
    column_order = ['RegionName', 'Timeslice'] + [col for col in comsuption_df.columns if col not in ['RegionName', 'Timeslice']]
    # Reorder the DataFrame columns
    comsuption_df = comsuption_df[column_order]


    # Save the DataFrame to a CSV file
    comsuption_df.to_csv(output_folder / f'Consumption{year}.csv', index=False)

comsuption_df.head()

Unnamed: 0,RegionName,Timeslice,SER.COOLING.HIGH-CONSUMPTION,SER.COOKING,SER.COMPUTING,SER.SPACE-HEAT.HIGH-CONSUMPTION,SER.SPACE-HEAT.LOW-CONSUMPTION,SER.LIGHTING.OFFICE,SER.LIGHTING.OTHER,SER.OTHER,SER.REFRIGERATION,SER.HOT-WATER.HIGH-CONSUMPTION,SER.HOT-WATER.LOW-CONSUMPTION
0,UK,1,23.03401,10.473196,3.548561,28.165639,14.210282,37.930162,17.123942,3.285895,8.473862,6.820312,2.902032
1,UK,2,1.749419,1.745533,0.545932,7.100288,5.08036,12.643387,5.707981,1.314358,3.389545,1.262066,0.580488
2,UK,3,1.749419,0.0,0.818899,7.937729,5.343836,16.85785,7.610641,2.300127,5.931703,4.427548,1.922766
3,UK,4,2.624128,5.236598,0.545932,14.823503,7.558101,16.85785,7.610641,0.985769,2.542159,3.294975,1.389932
4,UK,5,23.03401,10.473196,3.548561,34.177632,17.664974,37.930162,17.123942,3.285895,8.473862,7.143829,2.906259


## Create 2."Technodata.csv"

#### Read-in the technology data (e.g. cost,lifetime, efficiency)

In [10]:
data_file = 'Services sector - process techno-economic parameters.xlsx'
TechnoData = pd.read_excel(data_folder / data_file)

In [11]:
# All the process/technology names
TechList_all = TechnoData['process'].unique()

# For this analysis, we do not need technologies that are related to district heating or infrastructure.
# Define the substrings to filter out.
tech_remove = [".DIST", "INFRASTRUCTURE"] 

# Use a list comprehension to filter out strings containing any of the substrings
TechList = [s for s in TechList_all if not any(sub in s for sub in tech_remove)]


# print(len(TechList))

In [12]:
## need manually to assign the fuel type for some technologies.
fuel_mapping = {
    'ELC': ['.ELC','COMPUTERS', 'AIR-CONDITIONER', 'FREEZERS', 'REFRIGERATORS', 'LIGHTING', 'WET-APPLIANCES', 'CONSUMER-ELECTRONICS', 'ELECTRIC','ELECTRICITY','REFRIGERATION]'],
    'NGA': ['.GAS.','.NATURAL-GAS','.NGA.'],
    'OIL': ['.OIL'],
    'SOLAR': ['.SOL', '.SOLAR'],
    'HCO': ['.HCO'],
    'WOOD': ['.WOD'],
    'LTH': ['.LTH'],
    'COAL': ['.COL','COAL','COA'],
    'HYDROGEN': ['.HYDROGEN','HDG'],
    'KEROSENE': ['KEROSENE'],
    'LOGS': ['.LOGS'],
    'LFO': ['LIGHT-FUEL-OIL','.LFO'],
    'LPG': ['LIQUEFIED-PETROLEUM-GAS','.LPG'],
    'NGA-BOM-HYG': ['.NGA-BOM-HYG'],
    'COKE': ['.COKE'],
    'BOG': ['.BOG'], 
    'BIOMASS': ['.BIOMASS'],

}

In [13]:
# create a dictionary to store the techno-economic data for each technology
Technodata_all = {}


for tech in TechList:
    # print('\n',tech)

    # Create a dictionary to store the technology data
    techdata = {}

    # Add the technology name to the dictionary
    techdata['ProcessName'] = tech
    ##=============the cap_par parameter=================##
    # Filtering the dataframe where 'process' is tech and 'attribute' is a specific value [NCAP_COST]
    cap_par = TechnoData.loc[
        (TechnoData['process']== tech) & 
        (TechnoData['attribute'] == "NCAP_COST [Investment cost per unit of new capacity installed]"), 
        [base_year]
    ]
    # print(cap_par)

        # If no rows are found, assign 'NA'
    if cap_par.empty:
        techdata['cap_par'] = np.nan
    else:
        techdata['cap_par'] = cap_par[base_year].values[0]  # Extract the value

    

    ##===================the fix_par parameter===================##
    fix_par =  TechnoData.loc[
        (TechnoData['process']== tech) & 
        (TechnoData['attribute'] == "NCAP_FOM [Fixed operating and maintenance cost per unit of capacity according to the year initially installed.]"), 
        [base_year]
    ]

    # If no rows are found, assign 'NA'
    if fix_par.empty:
        techdata['fix_par'] = np.nan
    else:
        techdata['fix_par'] = fix_par[base_year].values[0]  # Extract the value



    ##===================the TechnicalLife parameter===================##
    TechnicalLife =  TechnoData.loc[(TechnoData['process']== tech) & 
        (TechnoData['attribute'] == "NCAP_TLIFE [Technical life-time of a Process;number of years.Default:G_TLIFE.]"), 
        [base_year]
    ]
    # If no rows are found, assign 'NA'
    if TechnicalLife.empty:
        techdata['TechnicalLife'] = np.nan
    else:
        techdata['TechnicalLife'] = TechnicalLife[base_year].values[0]  # Extract the value

    # print(techdata['TechnicalLife'])    


    ##===================the efficiency parameter===================##
    efficiency =  TechnoData.loc[(TechnoData['process']== tech) & 
        (TechnoData['attribute'] == "ACT_EFF [Generic process transformation parameter]"), 
        [base_year]
    ]
    # If no rows are found, assign 'NA'
    if efficiency.empty:
        techdata['efficiency'] = np.nan
    else:
        techdata['efficiency'] = efficiency[base_year].values[0]  # Extract the value

    #print(techdata['efficiency'])    
    

    ##===================the UtilizationFactor parameter===================##
    UtilizationFactor =  TechnoData.loc[
        (TechnoData['process'] == tech) & 
        (
            (TechnoData['attribute'] == "NCAP_AFA [Annual Availability factor relating a unit of production to the installed capacity according to the year initial installed.Fraction]") | 
            (TechnoData['attribute'] == "NCAP_AFC [Commodity-specific availability factor]")
        ), 
        [base_year]
    ]

    if UtilizationFactor.empty:
        techdata['UtilizationFactor'] = 1 # This will be overwritten by "TechnodataTimeslice.csv"
    else:
        techdata['UtilizationFactor'] = UtilizationFactor[base_year].values[0]  # Extract the value
    
    
    ##===================the Total Capacity Limit===================##
    # "TotalCapacityLimit" in MUSE

    TotalCapacityLimit =  TechnoData.loc[
        (TechnoData['process'] == tech) & 
        (TechnoData['attribute'] == "CAP_BND [Bound on the total installed capacity  in a time period]") , 
        [base_year]
    ]

    if TotalCapacityLimit.empty:
        techdata['TotalCapacityLimit'] = 100000000  # Default value
    else:
        techdata['TotalCapacityLimit'] = TotalCapacityLimit[base_year].values[0]  # Extract the value


    ##===================Max Capacity Addition===================##
    # "MaxCapacityAddition" in MUSE
    MaxCapacityAddition =  TechnoData.loc[
        (TechnoData['process'] == tech) & 
        (TechnoData['attribute'] == "NCAP_BND [Limit on investments in new capacity,Ignored if y-index prior to 1st MODLYEAR]") , 
        [0] # I don't know why the column name is called "0"
    ]

    if MaxCapacityAddition.empty:
        techdata['MaxCapacityAddition'] = 100000000  # Default value
    else:
        techdata['MaxCapacityAddition'] = MaxCapacityAddition[0].values[0]


    ##===================the Fuel parameter===================##
    # "Fuel" in MUSE
    techdata['Fuel'] = np.nan  # Default value

    for fuel, keywords in fuel_mapping.items():
        if any(keyword in techdata['ProcessName'] for keyword in keywords):
            techdata['Fuel'] = fuel
            break
    else:
        # Extract the matching text
        fuel_match = re.search(r"\.([^.]*)\.\s*$", tech)
        # Check if a match was found and print the result
        if fuel_match:
            techdata['Fuel'] = fuel_match.group(1)


    ##===================the Type parameter===================##
    ### skip the "Type" for now, as it is an optional parameter in MUSE
    # "Type" in MUSE
    # type_match = re.search(r": \.(.*)\.?$", tech)
    # if type_match:
    #     techdata['Type'] = type_match.group(1)
    # else:
    #     techdata['Type'] = np.nan
        
    
    # add the technology data to the Master dictionary
    Technodata_all[tech] = techdata
    # break

# Create a DataFrame from the dictionary of dictionaries
Technodata_df = pd.DataFrame.from_dict(Technodata_all, orient='index')

Technodata_df.head()


Unnamed: 0,ProcessName,cap_par,fix_par,TechnicalLife,efficiency,UtilizationFactor,TotalCapacityLimit,MaxCapacityAddition,Fuel
SCHDAIR00 [SER.COOLTH.HIGH-CONSUMPTION: .00.AIR.DELIVERY.],SCHDAIR00 [SER.COOLTH.HIGH-CONSUMPTION: .00.AI...,,0.0,50.0,1.0,1.0,100000000.0,2.0,
SCHP-CCG00 [SER.CHP: .00.COMBINED-CYCLE.NGA.],SCHP-CCG00 [SER.CHP: .00.COMBINED-CYCLE.NGA.],,16.842105,25.0,0.545074,0.326629,100000000.0,2.0,NGA
SCHP-GES00 [SER.CHP: .00.GAS-ENGINE.BOG.],SCHP-GES00 [SER.CHP: .00.GAS-ENGINE.BOG.],,23.487154,15.0,0.573375,0.466917,100000000.0,2.0,BOG
SCHP-STW00 [SER.CHP: .00.STEAM-TURBINE.BIOMASS.],SCHP-STW00 [SER.CHP: .00.STEAM-TURBINE.BIOMASS.],,24.024024,25.0,0.881346,0.580757,100000000.0,2.0,BIOMASS
SCKELC000 [SER.COOKING: .00.ELC.],SCKELC000 [SER.COOKING: .00.ELC.],,5.850889,14.0,0.9,1.0,100000000.0,2.0,ELC


In [14]:
# EndUse column:
## We will generate a process's 'EndUse' by looking at its ProcessName and comparing it with the 'commodity_df' column in the Consumption.csv, and use the "get_close_matches" to name it.

services = [item for item in comsuption_df.columns if '.' in item] # get all the services name and remove the 'RegionName' and 'Timeslice'. 

processes = Technodata_df['ProcessName'].to_list() # get all the processes/technologies name

#first, create the dictionary with closest matches
service_to_enduse = {}
for process in processes:
    proc = re.search(r"\[(.*?)\]", process).group(1)
    # print(proc)

        
    # Get the closest matching service for the current process item
    closest_match = get_close_matches(proc, services, n=1)
    

    if not closest_match: # if no match is found, consider only the base part of the process
        if ':' in proc:
            proc_base = proc.split(":")[0]  # Consider only the base part before ':'
        else:
            proc_base = re.split(r'\d+', proc)[0]  # Consider only the base part before any number
        closest_match = get_close_matches(proc_base, services, n=1)
    
    # Assign the service with the closest match to the process
    if closest_match:
        service_name = closest_match[0]
    else:
        service_name = None  # or some default value if no match is found

    service_to_enduse[process] = service_name
    # print(service_to_enduse[process])

    # break

#Then, apply the mapping to create the 'EndUse' column
Technodata_df['EndUse'] = Technodata_df['ProcessName'].map(service_to_enduse)

In [15]:
# Define a generalized function that can fill missing values for any specified column based on similar "ProcessName"
def fill_missing_column_value(row, data, target_column):
    """
    This function takes a row, the complete data frame, and a target column name.
    It finds similar processes (based on ProcessName) with non-missing values in the target column
    and returns the value from the most similar process.
    """
    # Extract the process name for the current row
    process_name = row['ProcessName']
    
    # Filter out rows with non-missing values in the target column
    available_data = data.dropna(subset=[target_column])
    
    # Find similar process names using the difflib's get_close_matches function
    similar_names = get_close_matches(process_name, available_data['ProcessName'], n=1, cutoff=0.5)
    
    # If a similar name is found, return the corresponding value in the target column
    if similar_names:
        similar_row = available_data[available_data['ProcessName'] == similar_names[0]]
        return similar_row[target_column].values[0]
    return None

# Apply the function to fill missing values in "cap_par", "fix_par", and "Fuel"
for column in ['cap_par', 'fix_par', 'Fuel']:
    Technodata_df[column] = Technodata_df.apply(lambda row: fill_missing_column_value(row, Technodata_df, column) 
                              if pd.isna(row[column]) else row[column], axis=1)



In [16]:
# (4) further check for missing values
print(Technodata_df.isnull().any())

# check for missing values (in which rows)
print(Technodata_df[Technodata_df.isnull().any(axis=1)])


ProcessName            False
cap_par                False
fix_par                False
TechnicalLife          False
efficiency             False
UtilizationFactor      False
TotalCapacityLimit     False
MaxCapacityAddition    False
Fuel                   False
EndUse                 False
dtype: bool
Empty DataFrame
Columns: [ProcessName, cap_par, fix_par, TechnicalLife, efficiency, UtilizationFactor, TotalCapacityLimit, MaxCapacityAddition, Fuel, EndUse]
Index: []


In [17]:
# add other columns that is required by MUSE
Technodata_df['RegionName'] = Region
Technodata_df['Time'] = base_year
Technodata_df['cap_exp'] = 1
Technodata_df['fix_exp'] = 1
Technodata_df['var_exp'] = 1
Technodata_df['var_par'] = 0
Technodata_df['MaxCapacityGrowth'] = 100 #%
Technodata_df['ScalingSize'] = 1
Technodata_df['InterestRate'] = 0.1

#### Add and adjust columns based on MUSE requirement.

In [18]:
# add Agent columns as required by MUSE
from cls_Agent import Agent

# total number of agents
n_agents = len(Agent.instances)

for agent in Agent.instances:
    Technodata_df[agent.AgentShare] = 1/n_agents # equal share of the technology to each agent
    

In [19]:
# Add the "Unit" row to the DataFrame
# Step 1: Create the dictionary with the corresponding unit values
unit_row = {
    "ProcessName" : "Unit",
    "RegionName": "-",
    "Time" : "year",
    "cap_par" : "MGBP2020/PJ_a",
    "cap_exp" : "-",
    "fix_par" : "MGBP2020/PJ_a",
    "fix_exp" : "-",
    "var_par" : "MGBP2020/PJ",
    "var_exp" : "-",
    "MaxCapacityAddition" : "PJ",
    "MaxCapacityGrowth" : "%",
    "TotalCapacityLimit" : "PJ",
    "TechnicalLife" : "years",
    "UtilizationFactor" : "-",
    "ScalingSize" : "PJ",
    "efficiency" : "%",
    "InterestRate" : "-",
    # "Type" : "-", 
    "Fuel" : "-", 
    "EndUse" : "-" 
}

# Add the Agent unit to the dictionary
unit_row.update({agent.AgentShare: agent.AgentType for agent in Agent.instances})

# Convert the dictionary into a DataFrame
unit_df = pd.DataFrame([unit_row])

# Step 3: Concatenate the unit row DataFrame on top of Technodata_final
Technodata_final = pd.concat([unit_df, Technodata_df]).reset_index(drop=True)


# Rearrange the columns to ensure 'ProcessName' is the first column
columns = ['ProcessName'] + [col for col in Technodata_final.columns if col != 'ProcessName']
Technodata_final = Technodata_final[columns]

# Technodata_final

In [20]:
# Save the the df as 'Technodata.csv'. 
Technodata_final.to_csv(output_folder / 'Technodata.csv', index=False)


## Create 3. 'Technodata_Timeslice.csv'

In [21]:
# Sample data for seasons and periods
seasons = ['A', 'P', 'S', 'W']  # Four seasons: A (Autumn), P (Spring), S (Summer), W (Winter)
periods = ['Day', 'Evening', 'Night', 'Peak']  # Originally was called: Day, Late_evening, Night, Evening_peak
# seasons_periods = ["AD","AE","AN","AP","PD","PE","PN","PP","SD","SE","SN","SP","WD","WE","WN","WP" ] # 16 time slices in TIMES model

In [22]:

# List to store rows
data = []

# Loop over all the technologies and generate rows for each one
for tech in Technodata_final['ProcessName'][1:].tolist():
    for season in seasons:
        for period in periods:

            result = TechnoData.loc[
                (TechnoData['process'] == tech) & 
                (TechnoData['time_slice'] == str(season+period[0])) &
                (
                    (TechnoData['attribute'] == "NCAP_AF [Availability factor relating a unit of production to the installed capacity according to the year initial installed.]") | 
                    (TechnoData['attribute'] == "FLO_FR [Load curve describing the availability of a commodity in time-slices of the period;fraction.Default:none[MM-SRCENCP FR,but for any process/seasonal commodity not just SRCENCP/electricity]]")
                ), 
                ['lim_type', base_year]
            ]


            # Assign MinimumServiceFactor with a default value of 0 if not found
            if 'LO' in result['lim_type'].values: # Lower bound
                MinimumServiceFactor = result[result['lim_type'] == 'LO'][base_year].values[0]
            else:
                MinimumServiceFactor = 0

            # Assign UtilizationFactor with a default value of 1 if not found
            if 'UP' in result['lim_type'].values: # Upper bound
                utilization_factor = result[result['lim_type'] == 'UP'][base_year].values[0]
            
            else:
                utilization_factor = 1

            # Time column is the year
            time = base_year

            # region
            RegionName = Region

            # Append each row to the data list
            data.append([tech, RegionName, time, season, period, utilization_factor, MinimumServiceFactor])

# Create a Pandas DataFrame
Timeslice_df = pd.DataFrame(data, columns=['ProcessName', 'RegionName', 'Time', 'season', 'period', 'UtilizationFactor','MinimumServiceFactor'])

# Save the DataFrame to a CSV file
Timeslice_df.to_csv(output_folder / 'TechnodataTimeslices.csv', index=False, float_format='%.6f', encoding='utf-8')

print('{TechnodataTimeslices.csv} created successfully.')

{TechnodataTimeslices.csv} created successfully.


## Create 4. "GlobalCommodities.csv" 

#### * We need the list of technologies from 'Technodata.csv' . This is used for extracting the "Fuel" used in the technologies.
#### * Or this can be obtained from the "Technodata_final" dataframe, which we created earlier (we will use this approach here)

In [23]:
# Fuel_EndUse = Technodata_final.iloc[1:][['Fuel', 'EndUse']].copy()

fuels = Technodata_final.iloc[1:]['Fuel'].unique().tolist() #  get the 'fuels' from the "Fuel" column

services = [col for col in comsuption_df.columns if col not in ["RegionName", "Timeslice"]] # Get the name of the services from the column names in the comsuption_df

In [24]:
# Global commodities will be consistuted of the unique values of 'Fuel' column and 'services'.
energy_commodity = pd.DataFrame({'Commodity': fuels, 'CommodityType': 'Energy'}) # we name it as energy_commodity as MUSE uses this name.

service_commodity = pd.DataFrame({'Commodity': services, 'CommodityType': 'Services'}) # we name it as energy_commodity as MUSE uses this name.

In [25]:
# Concatenate both DataFrames to form the final result
GlobalCommodities_df = pd.concat([energy_commodity, service_commodity], ignore_index=True)

# add the CO2f to the Commodity
GlobalCommodities_df.loc[len(GlobalCommodities_df)] = ['CO2f', 'Environmental']

# Add a new column 'CommodityName' with the same values as 'Commodity'
GlobalCommodities_df['CommodityName'] = GlobalCommodities_df['Commodity']


# save the new DataFrame to a new csv file
GlobalCommodities_df.to_csv(output_folder / 'GlobalCommodities.csv', index=False)

# display the df
# GlobalCommodities_df

## Create 5. CommIn.csv and CommOut.csv

In [26]:
from cls_Commodity import fossil_fuels #  need to use the  emission factor from the fossil_fuels, which is not given in the TIMES data set.
# importlib.reload(cls_Commodity)

In [27]:
# Create a new DataFrame with ProcessName as the first column and fuel types as the other columns
df_CommIn = pd.DataFrame(columns=['ProcessName'] + fuels)
#  Create a new DataFrame with ProcessName as the first column and service as the other columns, and an additional column for CO2 emissions
df_CommOut = pd.DataFrame(columns=['ProcessName'] + services + ['CO2f'])

# # Step 3: Populate the df_CommIn and df_CommOut DataFrame
for _, row in Technodata_final.iterrows():
    if row['ProcessName'] == 'Unit': # Skip the 'Unit' row
        continue
    else:
        # print(row)
        process_name = row['ProcessName']
        fuel_type = row['Fuel']
        efficiency = float(row['efficiency'])
        end_use = row['EndUse']
        
        ## ==== CommIn ==== ##
        # Create a new row as a DataFrame with 0s for all fuel types
        new_row1 = pd.DataFrame([[process_name] + [0]*len(fuels)], columns=['ProcessName'] + fuels)
        new_row1[fuel_type] = 1 / efficiency

        if df_CommIn.empty:
        # If df_CommIn is empty, initialize it with new_row1 directly
            df_CommIn = new_row1
        else:
            # Otherwise, concatenate as usual
            df_CommIn = pd.concat([df_CommIn, new_row1])


        ## ==== CommOut ==== ##
        # Create a new row as a DataFrame with 0s for all end_use types
        new_row2 = pd.DataFrame([[process_name] + [0]*len(services)+ [0]], columns=['ProcessName'] + services + ['CO2f'])
        new_row2[end_use] = 1 # set the value to 1
        
        if fuel_type in fossil_fuels.keys():
            new_row2['CO2f'] = fossil_fuels[fuel_type].CommodityEmissionFactor_CO2
        
        if df_CommOut.empty:
        # If df_CommIn is empty, initialize it with new_row1 directly
            df_CommOut = new_row2
        else:
            # Otherwise, concatenate as usual
            df_CommOut = pd.concat([df_CommOut, new_row2])


        
# print(df_CommIn)

In [28]:
# add 'Level' to the df_CommIn as required by MUSE
df_CommIn['Level'] = 'fixed'  # or 'fixed'


for df in [df_CommIn, df_CommOut]:
    # add the RegionName as MUSE requires it.
    df['RegionName'] = Region

    #add Time column as MUSE requires it.
    df['Time'] = base_year

    # add the 'Unit' row. Define the new row values based on the column names
    new_row = ["Unit"] + [
        "year" if col == "Time" 
        else "kt/PJ" if col == "CO2f" 
        else "-" if col in ["RegionName", "Level"]
        else "PJ/PJ"
        for col in df.columns[1:]
    ]

    # Add the unit row (as the last row) to the DataFrame
    df.loc[-1] = new_row


In [29]:
# save the data to csv files
df_CommIn.to_csv(output_folder / 'CommIn.csv', index=False)
df_CommOut.to_csv(output_folder /  'CommOut.csv', index=False)

## Create the 6. "Projections.csv"

In [30]:
# set time range that we are interested in
years = [2010,2015, 2020, 2025, 2030, 2035, 2040, 2045, 2050]

In [31]:
proj_df = pd.DataFrame({'Time': years})

# Use values in df2['D'] as column names in df1
for tech in GlobalCommodities_df['CommodityName'].to_list():
    proj_df[tech] = 0  # Initialize with placeholder values (e.g., 0), to be replaced with actual values later

# Add the 'RegionName' and 'Attribute' columns as required by the MUSE format
proj_df.insert(0, 'RegionName', Region)  # Insert 'Region' as the first column
proj_df.insert(1, 'Attribute', 'CommodityPrice')  # Insert 'region' as the first column



In [32]:
# save the new DataFrame to a new csv file
proj_df.to_csv(output_folder / 'Projections.csv', index=False)

# display the df
proj_df

Unnamed: 0,RegionName,Attribute,Time,NGA,BOG,BIOMASS,ELC,OIL,HCO,LFO,...,SER.COMPUTING,SER.SPACE-HEAT.HIGH-CONSUMPTION,SER.SPACE-HEAT.LOW-CONSUMPTION,SER.LIGHTING.OFFICE,SER.LIGHTING.OTHER,SER.OTHER,SER.REFRIGERATION,SER.HOT-WATER.HIGH-CONSUMPTION,SER.HOT-WATER.LOW-CONSUMPTION,CO2f
0,UK,CommodityPrice,2010,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,UK,CommodityPrice,2015,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,UK,CommodityPrice,2020,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,UK,CommodityPrice,2025,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,UK,CommodityPrice,2030,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,UK,CommodityPrice,2035,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,UK,CommodityPrice,2040,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,UK,CommodityPrice,2045,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,UK,CommodityPrice,2050,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Create 7."ExistingCapacity.csv"

### Step 1. Read-in the "PRC_RESID [Residual Capacity]" parameters in the TIMES data set.

In [33]:
#### Read-in the list of technologies that will be used 
data_file = 'Services sector - process techno-economic parameters.xlsx'

# Read the data into a DataFrame
tech_econ_data = pd.read_excel(data_folder / data_file)

# Display the first few rows of the DataFrame
tech_econ_data.head()


Unnamed: 0,side,lim_type,time_slice,user_constraint,attribute,commodity,commodity_group,process,scenario,Unnamed: 9,...,2055,2060,2065,2070,2075,2080,2085,2090,2095,2100
0,-,-,-,-,END [-],-,-,SCHDAIR00 [SER.COOLTH.HIGH-CONSUMPTION: .00.AI...,BASE,2010.0,...,,,,,,,,,,
1,-,-,-,-,END [-],-,-,SCHP-CCG00 [SER.CHP: .00.COMBINED-CYCLE.NGA.],BASE,2010.0,...,,,,,,,,,,
2,-,-,-,-,END [-],-,-,SCHP-GES00 [SER.CHP: .00.GAS-ENGINE.BOG.],BASE,2010.0,...,,,,,,,,,,
3,-,-,-,-,END [-],-,-,SCHP-STW00 [SER.CHP: .00.STEAM-TURBINE.BIOMASS.],BASE,2010.0,...,,,,,,,,,,
4,-,-,-,-,END [-],-,-,SCKELC000 [SER.COOKING: .00.ELC.],BASE,2010.0,...,,,,,,,,,,


In [34]:
Residual_Capacity = tech_econ_data[tech_econ_data['attribute'] == "PRC_RESID [Residual Capacity]"].copy()

# Using DataFrame.drop to remove the columns that are not needed
Residual_Capacity.drop(columns=['side','lim_type','time_slice','user_constraint','commodity','commodity_group','scenario','Unnamed: 9',0], inplace=True)

# Display the first few rows of the DataFrame
Residual_Capacity.head()


Unnamed: 0,attribute,process,2010,2011,2015,2017,2020,2024,2025,2030,...,2055,2060,2065,2070,2075,2080,2085,2090,2095,2100
977,PRC_RESID [Residual Capacity],SCHDAIR00 [SER.COOLTH.HIGH-CONSUMPTION: .00.AI...,430.569581,,,,,,,,...,,0.0,,,,,,,,
978,PRC_RESID [Residual Capacity],SCHP-CCG00 [SER.CHP: .00.COMBINED-CYCLE.NGA.],1.370521,,,,,,,,...,,,,,,,,,,
979,PRC_RESID [Residual Capacity],SCHP-GES00 [SER.CHP: .00.GAS-ENGINE.BOG.],0.338002,,,,,,0.0,,...,,,,,,,,,,
980,PRC_RESID [Residual Capacity],SCHP-STW00 [SER.CHP: .00.STEAM-TURBINE.BIOMASS.],0.435777,,,,,,,,...,,,,,,,,,,
981,PRC_RESID [Residual Capacity],SCKELC000 [SER.COOKING: .00.ELC.],43.217479,,,,,0.0,,,...,,,,,,,,,,


In [35]:
# Define a function to explicitly calculate linear interpolation between start (year) and cutoff years (the year where the residual capacity reaches 0)
# because the data in TIMES data set is not complete. As you can see in the above table, the residual capacity is not available for all years (as indicated by NaN values).

def explicit_interpolation(row):
    # Identify year columns and convert them to numeric
    year_columns = [col for col in Residual_Capacity.columns if isinstance(col, int)]
    row[year_columns] = row[year_columns].apply(pd.to_numeric, errors='coerce')
    
    # Starting point
    start_year = year_columns[0]
    start_value = row[start_year]
    
    # Identify cutoff year
    cutoff_year = None
    for year in year_columns:
        if row[year] == 0:
            cutoff_year = year
            break
    
    # Interpolation between start and cutoff
    if cutoff_year:
        start_idx = year_columns.index(start_year)
        cutoff_idx = year_columns.index(cutoff_year)
        
        # Calculate interpolated values
        for i in range(start_idx + 1, cutoff_idx):
            year = year_columns[i]
            row[year] = start_value + (row[cutoff_year] - start_value) * (i - start_idx) / (cutoff_idx - start_idx)
        
        # Set values after cutoff to 0
        for year in year_columns[cutoff_idx + 1:]:
            row[year] = 0
    else:
        # If no cutoff, set all years after start to 0
        row.loc[year_columns[1:]] = 0

    return row

In [36]:
# Apply this interpolation method
Residual_Capacity_interpolation = Residual_Capacity.apply(explicit_interpolation, axis=1)

# List of columns to keep
years = list(range(2010,2051,5)) # years of interest
columns_to_keep = ["process"] + years

# Select the ones you want
Residual_Capacity_interpolation = Residual_Capacity_interpolation[columns_to_keep].reset_index(drop=True)

# In order to further processing(concatenating) in later step 3, we need to rephrase the name in the "process" column
# Residual_Capacity_interpolation['process'] = Residual_Capacity_interpolation['process'].str.extract(r'\[(.*?)\]')[0] # Extract the text between "[" and "]"


Residual_Capacity_interpolation.head()

Unnamed: 0,process,2010,2015,2020,2025,2030,2035,2040,2045,2050
0,SCHDAIR00 [SER.COOLTH.HIGH-CONSUMPTION: .00.AI...,430.569581,369.059641,307.549701,246.039761,215.28479,153.77485,123.01988,92.26491,61.50994
1,SCHP-CCG00 [SER.CHP: .00.COMBINED-CYCLE.NGA.],1.370521,1.065961,0.761401,0.45684,0.30456,0.0,0.0,0.0,0.0
2,SCHP-GES00 [SER.CHP: .00.GAS-ENGINE.BOG.],0.338002,0.225334,0.112667,0.0,0.0,0.0,0.0,0.0,0.0
3,SCHP-STW00 [SER.CHP: .00.STEAM-TURBINE.BIOMASS.],0.435777,0.338938,0.242098,0.145259,0.096839,0.0,0.0,0.0,0.0
4,SCKELC000 [SER.COOKING: .00.ELC.],43.217479,25.930487,8.643496,0.0,0.0,0.0,0.0,0.0,0.0


### Step 2. Then Read-in the list of technologies that will be used 
#### * We can retrive this information from the dataframe 'Technodata_final' that we created earlier in this Jupter Notebook or directly from 'Technodata.csv'
#### * Here we read directly from 'Technodata.csv'

In [37]:
# make sure you have created 'Technodata.csv' already.
data_file = 'Technodata.csv'

# read the csv file, we only need the column 'ProcessName'
df_tech_lst = pd.read_csv(output_folder / data_file, usecols=['ProcessName'],skiprows=[1]) # skip the 'Unit' row

df_tech_lst.head()

Unnamed: 0,ProcessName
0,SCHDAIR00 [SER.COOLTH.HIGH-CONSUMPTION: .00.AI...
1,SCHP-CCG00 [SER.CHP: .00.COMBINED-CYCLE.NGA.]
2,SCHP-GES00 [SER.CHP: .00.GAS-ENGINE.BOG.]
3,SCHP-STW00 [SER.CHP: .00.STEAM-TURBINE.BIOMASS.]
4,SCKELC000 [SER.COOKING: .00.ELC.]


## Step 3. Concatenate/Merge the above two dataframes ( 'Residual_Capacity_interpolation' and 'df_tech_lst')

In [38]:
# merge the two DataFrames, while keeping all the rows in the left DataFrame (i.e. df_tech_lst)
df_merged = df_tech_lst.merge(Residual_Capacity_interpolation, left_on='ProcessName', right_on='process', how='left')

# Replace NaN values in year columns with 0 for non-matching rows
df_merged[years] = df_merged[years].fillna(0)

# drop the "Process" column
df_merged.drop('process', axis=1, inplace=True)


## Step 4.  Format the merged df according to MUSE requirement and save it as a .csv file

In [39]:
# set the region and unit.
region = 'UK'
unit = 'PJ/y'


# rename the columns
df_merged.rename(columns={'MemberDesc': 'ProcessName'}, inplace=True)

# add the region and unit columns
df_merged['RegionName'] = region
df_merged['Unit'] = unit

# save the DataFrame to a csv file
df_merged.to_csv(output_folder /  'ExistingCapacity.csv', index=False)

# display the DataFrame
df_merged.head()



Unnamed: 0,ProcessName,2010,2015,2020,2025,2030,2035,2040,2045,2050,RegionName,Unit
0,SCHDAIR00 [SER.COOLTH.HIGH-CONSUMPTION: .00.AI...,430.569581,369.059641,307.549701,246.039761,215.28479,153.77485,123.01988,92.26491,61.50994,UK,PJ/y
1,SCHP-CCG00 [SER.CHP: .00.COMBINED-CYCLE.NGA.],1.370521,1.065961,0.761401,0.45684,0.30456,0.0,0.0,0.0,0.0,UK,PJ/y
2,SCHP-GES00 [SER.CHP: .00.GAS-ENGINE.BOG.],0.338002,0.225334,0.112667,0.0,0.0,0.0,0.0,0.0,0.0,UK,PJ/y
3,SCHP-STW00 [SER.CHP: .00.STEAM-TURBINE.BIOMASS.],0.435777,0.338938,0.242098,0.145259,0.096839,0.0,0.0,0.0,0.0,UK,PJ/y
4,SCKELC000 [SER.COOKING: .00.ELC.],43.217479,25.930487,8.643496,0.0,0.0,0.0,0.0,0.0,0.0,UK,PJ/y


## Create 8. "Agent.csv"
#### * For this version of the model, we create a simple representation of agents - 2 agents.
#### * In a different version of the input file, we will create more types of agents.

In [40]:
# import Agent from another script
# import cls_Agent #(already imported above)

In [41]:
# Convert the list of Agent instances to a list of dictionaries
data = [vars(instance) for instance in Agent.instances]

# Convert the list of dictionaries to a pandas DataFrame
df = pd.DataFrame(data)

# Rename the 'AgentType' column to 'Type' to match the MUSE model requirements
df.rename(columns={'AgentType': 'Type'}, inplace=True) 


# Write the DataFrame to a CSV file
csv_file = output_folder / 'Agent.csv'
df.to_csv(csv_file, index=False, float_format='%.6f', encoding='utf-8')

print(f"CSV file '{csv_file}' created successfully.")

CSV file 'c:\Users\jyang8\MUSE_models\Service\MUSE_Files\Agent.csv' created successfully.
