# Output Preparation for the Nord_H2ub Spine Model

This jupyter notebook contains all routines for the preparation of the input data sources into a input data file for the model in Spine.

Authors: Johannes Giehl (jfg.eco@cbs.dk), Dana J. Hentschel (djh.eco@cbs.dk), Lucia Ciprian (luc.eco@cbs.dk)

## General settings

### Packages:

In [1]:
import pandas as pd
import os
import openpyxl
import numpy as np

In [2]:
def present_value_factor(n, r):
    """
    Calculate the present value factor of an annuity (Rentenbarwertfaktor).

    Parameters:
    n (int): The number of periods (time horizon).
    r (float): The discount rate (WACC).

    Returns:
    float: The present value factor of the annuity.
    """
    if r == 0:
        return n
    else:
        return (1 - (1 + r) ** -n) / r

### define parameters

In [3]:
#parameters for present_value_factor calculation
time_horizon = 25  # Number of periods (years)
wacc = 0.05        # Discount rate
starting_year = "2020"
#os.chdir('C:/Users/luc.eco/OneDrive - CBS - Copenhagen Business School/Documents/GitHub/Nord_H2ub/Spine_Projects/03_output_data')



### define variables

In [10]:
#TBA

### File paths:

In [71]:
#get path of latest spine results
#parent folder
parent_folder_results = '../02_basic_energy_model/.spinetoolbox/items/exporter/output'
folders = [f for f in os.listdir(parent_folder) if os.path.isdir(os.path.join(parent_folder_results, f))]
if not folders:
    print("No folders found.")
else:
    latest_folder = max(folders, key=lambda x: os.path.getmtime(os.path.join(parent_folder_results, x)))
    latest_folder_path = os.path.join(parent_folder, latest_folder)
latest_folder_path = latest_folder_path.replace('\\', '/')
folder_path_results = latest_folder_path
folder_path_results += '/'

#get the information of the prepared input data that is used for the spine optimization
prepared_input_file_path = os.path.join('..', '01_input_data', '02_input_prepared')


#prepared output data export to
output_file_path = '../03_output_data/01_basic_energy_model_outputs/'

In [76]:
#set the name of the relevant files
#input files for this script

#file name export from SpineToolbox
output_exported_file = 'Output_exported.xlsx'

#input file used for the optimization
data_from_inputs = '\methanol_Input_prepared.xlsx'

#this way of the output data preparation must be changed
#the information is manually added to the input xlsx and does not exist in the automated input generation
data_from_inputs_temporary = '\methanol_Input_prepared_for_output_temporary.xlsx'

#output files of this script
#file names to store the prepared output
output_prepared_export = 'output_last_run.xlsx'

In [77]:
#combine input path and files
#to input files
file_path_data_from_inputs  = prepared_input_file_path + data_from_inputs
full_path_data_from_inputs = os.path.abspath(os.path.join(os.getcwd(), file_path_data_from_inputs))

#for temporary appraoch
file_path_data_from_inputs_temporary  = prepared_input_file_path + data_from_inputs_temporary
full_path_data_from_inputs_temporary = os.path.abspath(os.path.join(os.getcwd(), file_path_data_from_inputs_temporary))

## Workflow of the data preparation

### Data Import

In [79]:
df_output_raw = pd.read_excel(os.path.join(folder_path_results + output_exported_file), sheet_name=-1)  
df_PV_prices = pd.read_excel(file_path_data_from_inputs, sheet_name='Energy_prices')
df_model_definition = pd.read_excel(full_path_data_from_inputs_temporary, sheet_name='Definition')
df_units = pd.read_excel('../01_input_data/01_input_raw/methanol/Model_Data_Base.xlsx', sheet_name='Units')
df_storages = pd.read_excel('../01_input_data/01_input_raw/methanol/Model_Data_Base.xlsx', sheet_name='Storages')
#the next one seems to cause an issue
df_operation_prices = pd.read_excel('../01_input_data/03_overview_input/data_needed_overview.xlsx', sheet_name='Values')


  for idx, row in parser.parse():


### data frame preparation

In [31]:
#create a copy of the original output DataFrame
df_output = df_output_raw.copy()

# Replace NaN values with empty strings in the first three rows
df_output.iloc[:3] = df_output.iloc[:3].fillna('')

# Combine the old header with the strings from the first three rows for each column
new_headers = df_output.columns + '_' + df_output.iloc[0] + '_' + df_output.iloc[1] + '_' + df_output.iloc[2]

# Set the new headers
df_output.columns = new_headers

# Drop the first three rows
#might be helpful bot not implemented now
#df_output = df_output.drop([0, 1, 2])

# Reset the index
df_output.reset_index(drop=True, inplace=True)

# Rename the first column to "timeseries"
df_output.columns.values[0] = "timeseries"

### data adjustments

In [33]:
#calculate revenues from PV sales on the wholesale market
selected_column_name = None
for column_index in range(len(df_output.columns)):
    if df_output.iloc[0, column_index] == 'power_line_Wholesale_Kasso' \
        and df_output.iloc[1, column_index] == 'to_node' \
        and df_output.iloc[2, column_index] == 'Power_Wholesale':
        selected_column_name = df_output.columns[column_index]
        break

if selected_column_name:
    df_output['Revenue_from_PV'] = df_output[selected_column_name].iloc[3:] * df_PV_prices['Power_Wholesale_Out'].iloc[4]
else:
    print("Column with specified headers not found in output.")

In [34]:
#get total cost of the system
total_costs = df_output.filter(like='costs').iloc[3]
#get total revenue form PV power sale (times -1 is relevant as the input is structured that negative prices for exports reduce total cost). 
total_PV_revenue = df_output['Revenue_from_PV'].sum()*(-1)
#calculate cost without PV revenue
adjusted_costs = total_costs - (total_PV_revenue * (-1))

#create separate DataFrame for total and adjusted cost
df_system_cost_output = pd.DataFrame()
df_system_cost_output['Total_cost'] = total_costs
df_system_cost_output['PV_revenue'] = total_PV_revenue
df_system_cost_output['Total_adjusted_cost'] = adjusted_costs

In [35]:
# Identify columns to drop
columns_to_drop_1 = df_output.filter(like='costs').columns
# Drop the identified columns if any are found
if not columns_to_drop_1.empty:
    df_output.drop(columns=columns_to_drop_1, inplace=True)

#test this and implement an if check
columns_to_drop_2 = df_output.filter(like='unit_flow_op').columns
# Drop the identified columns if any are found
if not columns_to_drop_2.empty:
    df_output.drop(columns=columns_to_drop_2, inplace=True)

## calculate LCOE

calculation of levelized cost of energy

### calculate investment cost

In [36]:
#implementation of either from the existing capacity as input from the model
#or if no input capacity is defined as max capacity used in the model


d_investments = {}
    
for index, row in df_model_definition.iterrows():
    investment = 0
    in_entry = False
    out_entry = False
    if row["Category"] == "unit":
        # Go through unit sheet to find the capacities
        print("object: ", row["Object_Name"]) #Delete, just for testing
        for index_units, row_units in df_units.iterrows():
            
            # Calculate investments of units
            if row["Object_Name"] == row_units["Unit"]:
                # If no capacity is given, find the maximum needed capacity calculated by the model
                if pd.isnull(row_units["Cap_Input1_existing"]) and pd.isnull(row_units["Cap_Output1_existing"]):
                    for index_output, column in enumerate(df_output_raw.columns):
                        if df_output_raw.iloc[1, index_output] == row["Object_Name"]:
                            if "Power_" in df_output_raw.iloc[3, index_output]:
                                cap = df_output_raw[column].max()
                # If capacity is known, choose it to calculate investment costs
                else:   
                    if not pd.isnull(row_units["Cap_Input1_existing"]):
                        max_cap_in = row_units["Cap_Input1_existing"]
                        in_entry = True
                    if not pd.isnull(row_units["Cap_Output1_existing"]):
                        max_cap_out = row_units["Cap_Output1_existing"]
                        out_entry = True
                    # If there is an input and an output capacity it chooses the higher one, but to check: which one is really needed for investemnt costs 
                    if in_entry and out_entry:
                        if max_cap_in > max_cap_out:
                            cap = max_cap_in
                        else: 
                            cap = max_cap_out
                    elif in_entry:
                        cap = max_cap_in
                    elif out_entry:
                        cap = max_cap_out

                # Finding the unit investment costs with which the total investement cost per object is calculated
                for index_values, row_values in df_operation_prices.iterrows():
                    if row_values["Type"] == "unit investment cost":
                        if row_values[0] == row["Object_type"]:
                            costs_per_energy = row_values["Value 2020"] 
                            if costs_per_energy =="depends on local conditions":
                                print(f"Operating price unknown for {row['Object_Name']}")                            
                            else:
                                investment = cap * costs_per_energy
                                print(investment) #Delete, just for testing
                                d_investments[f"investment_{row['Object_Name']}"] = investment
                    

    # Calculate investments of storages    
    elif "storage" in row["Object_Name"].lower() and row["Category"] == "node":
        for index_units, row_storages in df_storages.iterrows():
            # Calculate investments of units
            if row["Object_Name"] == row_storages["Storage"]:
                print("object: ", row["Object_Name"]) #Delete, just for testing
                # If no capacity is given, find the maximum needed capacity calculated by the model
                if pd.isnull(row_storages["node_state_cap"]):
                    for index_output, column in enumerate(df_output_raw.columns):
                        if df_output_raw.iloc[1, index_output] == row["Object_Name"]:
                            if "Power_" in df_output_raw.iloc[3, index_output]:
                                cap = df_output_raw[column].max()
                # If capacity is known, choose it to calculate investment costs
                else:   
                    cap = row_storages["node_state_cap"]

                # Find the costs of each object and calculate the investment
                for index_values, row_values in df_operation_prices.iterrows():
                    if row_values["Type"] == "storage investment cost":
                        if row_values[0] == row["Object_type"]:
                            if pd.isnull(row_values["Value 2020"]):
                                print(f"Operating price unknown for {row['Object_Name']}") 
                            else:
                                costs_per_energy = row_values["Value 2020"] 
                                if costs_per_energy =="depends on local conditions":
                                    print(f"Operating price unknown for {row['Object_Name']}")                            
                                else:
                                    investment = cap * costs_per_energy
                                    print(investment) #Delete, just for testing
                                    d_investments[f"investment_{row['Object_Name']}"] = investment
                    
total_investment = sum(d_investments.values())
print(total_investment)

#electrolysis
#investment_electrolysis = 50000000

#methanol plant
#investment_methanol = 50000000

#hydrogen storage
#investment_hydrogen_storage = 50000000

#hydrogen storage
#investment_methanol_storage = 50000000

#further components like CO2 vaporizer, steam engine etc. 
#relevant to have a routine that identifies the units automatically. 


# changes made in excel sheets:
# methanol input prep file: object_type was added
# unit investment costs for detilation tower changed from see methanol reactor to values of methanol reactor
# To adjust:
# unit investment costs for each object type and uniform units (€/kW, €/MW ?) 
# delete Calculation of investment for destilation tower?
# Steam_Plant investments costs?

object:  Solar_Plant_Kasso
170.24
object:  Electrolyzer
98800.0
object:  CO2_Vaporizer
Operating price unknown for CO2_Vaporizer
object:  Destilation_Tower
70.2
object:  Methanol_Reactor
70.2
object:  Steam_Plant
object:  E-Methanol_storage_Kasso
Operating price unknown for E-Methanol_storage_Kasso
object:  Hydrogen_storage_Kasso
12100.0
111210.64


  if row_values[0] == row["Object_type"]:
  if row_values[0] == row["Object_type"]:


In [37]:
df_system_cost_output

Unnamed: 0,Total_cost,PV_revenue,Total_adjusted_cost
total_costs_toy__,9822379.098829,5790770.0,15613148.71165


### variable costs

In [38]:
#get annual costs
annual_costs = df_system_cost_output.loc['total_costs_toy__', 'Total_adjusted_cost']

### energy output

In [39]:
#energy output
energy_output_methanol = df_output.filter(like='Tower_to_node_E-Methanol_Kasso')

# Convert strings to numbers, ignoring non-numeric values (relevant as first rows are strings)
energy_output_methanol_value = pd.to_numeric(energy_output_methanol.iloc[:,0], errors='coerce').sum()

In [40]:
#calculation of the present value factor
pcf_value = present_value_factor(time_horizon, wacc)

LCOE = (total_investment + (annual_costs * pcf_value)) / (energy_output_methanol_value * pcf_value)
LCOE

86.52156889436878

### Creating one combined excel and export

In [42]:
with pd.ExcelWriter(output_file_path + output_prepared_export) as writer:
    df_output.to_excel(writer, sheet_name='flows_node_states')
    df_system_cost_output.to_excel(writer, sheet_name='system_costs')