# Data Preparation for the Nord_H2ub Spine Model

This jupyter notebook contains all routines for the preparation of the input data sources into a input data file for the model in Spine. 

**Authors:** Johannes Giehl (jfg.eco@cbs.dk), Dana J. Hentschel (djh.eco@cbs.dk)

## General settings

### Packages:

In [2]:
import numpy as np
import pandas as pd
from datetime import timedelta

### Base parameters

In [3]:
#define year and create time stamp
year = 2019   #change to desired year
start_date = pd.Timestamp(str(year) + '-01-01 00:00:00')
end_date = pd.Timestamp(str(year) + '-12-31 23:00:00')
#set area
area = 'DK1'   #change to desired area
#scenario
scenario = 'Base'
#frequency model
frequency = "1h"
#model name
model_name = "toy"
#stochastics
stochastic_scenario = "realisation"
stochastic_structure = "deterministic"

### File paths

In [4]:
#set path to correct folders
#input data
excel_file_path = '../Input_data/Input_raw/'

In [5]:
#set name of the relevant files

definition = 'Full_Excel.xlsx'
PV_data_availabilityfactors = 'PV_availability_factors_Kasso_' + str(year) + '.xlsx'
data_powerprices = 'Day_ahead_prices_' + str(year) + '.xlsx'


## Workflow of the data preparation

### General parameters

In [6]:
#date index
date_index = pd.date_range(start=start_date, end=end_date, freq='H')
formatted_dates = date_index.strftime('%Y-%m-%dT%H:%M:%S')
df_formatted_dates = pd.DataFrame(formatted_dates, columns=['DateTime'])

df_time = pd.DataFrame(df_formatted_dates)

### Data import

In [7]:
#Definition of objects
df_units = pd.read_excel(excel_file_path+definition, sheet_name='Units')
df_nodes = pd.read_excel(excel_file_path+definition, sheet_name='Nodes')
df_connections = pd.read_excel(excel_file_path+definition, sheet_name='Connections')
#Object-to-node
df_object__to_node_values = pd.read_excel(excel_file_path+definition, sheet_name='Object__to_node')
#Object-from-node
df_object__from_node_values = pd.read_excel(excel_file_path+definition, sheet_name='Object__from_node')
#Object-node-node
df_object__node_node_values = pd.read_excel(excel_file_path+definition, sheet_name='Object__node_node')
#Variable efficiency
df_variable_efficiency = pd.read_excel(excel_file_path+definition, sheet_name='Variable_Eff')
#Availability factor
df_PV_availabilityfactors_values = pd.read_excel(excel_file_path+PV_data_availabilityfactors, skiprows=2, usecols=[0,1,2,3,4,5])
#Power prices
df_powerprices_total_values = pd.read_excel(excel_file_path+data_powerprices)
#only extracting the prices from our earlier defined area
df_powerprices_values = df_powerprices_total_values[df_powerprices_total_values['PriceArea'] == area]
df_powerprices_values = df_powerprices_values.reset_index(drop=True)
#Storage
df_storage_raw = pd.read_excel(excel_file_path+definition, sheet_name='Storage')
#Model relationships
df_model_relations = pd.read_excel(excel_file_path+definition, sheet_name='Model_relations')
#Model
df_model = pd.read_excel(excel_file_path+definition, sheet_name='Model')

### Adjustments

#### Adjust base alements:

In [8]:
#define correct columns names
df_units['Category'] = 'unit'
df_nodes['Category'] = 'node'
df_connections['Category'] = 'connection'

df_definition = pd.concat([df_units, df_nodes, df_connections], ignore_index=True)
#creating a dictionary whether an object is a unit or a connection:
definitions_dict = dict(zip(df_definition['Object_Name'],df_definition['Category']))

#### Times series:

In [9]:
#adjust PV columns names
df_PV_availabilityfactors_values.rename(columns={'time': 'time [UTC]', 
                                                 'local_time': 'time [' + area + ']',
                                                 'electricity': 'unit_availability_factor'}, inplace=True)
df_powerprices_values.rename(columns={'HourUTC': 'time [UTC]', 
                                         'HourDK': 'time [' + area + ']'}, inplace=True)

### Fitting data into format

#### Relationships:

Object__node_node:

In [10]:
#create empty list where output will be stored
relationship_class_name__node_node = []
#loop where Python finds out whether object is a unit or a connection:
for obj in df_object__node_node_values['object']:
    category = definitions_dict.get(obj, 'Undefined')
    relationship_class_name__node_node.append(category + '__node_node')
df_relationship_class_name__node_node = pd.DataFrame(relationship_class_name__node_node, columns=['relationship_class_name'])
#concat this with data:
df_object__node_node = pd.concat([df_relationship_class_name__node_node, df_object__node_node_values], axis=1)
#show df head for control
df_object__node_node.head()

Unnamed: 0,relationship_class_name,object,node,node.1,parameter_name,value
0,unit__node_node,CO2_Vaporizer,Carbon_Dioxide,Vaporized_Carbon_Dioxide,fix_ratio_in_out_unit_flow,1
1,unit__node_node,Methanol_Reactor,Hydrogen_Kasso,Vaporized_Carbon_Dioxide,fix_ratio_in_in_unit_flow,1
2,unit__node_node,Methanol_Reactor,Hydrogen_Kasso,Raw_Methanol,fix_ratio_in_out_unit_flow,1
3,unit__node_node,Methanol_Reactor,Raw_Methanol,Waste_Heat,fix_ratio_out_out_unit_flow,4
4,unit__node_node,Destilation_Tower,Raw_Methanol,E-Methanol_Kasso,fix_ratio_in_out_unit_flow,1


Object__to_node:

In [11]:
#create empty list where output will be stored
relationship_class_name__to_node = []
#loop where Python finds out whether object is a unit or a connection:
for obj in df_object__to_node_values['object']:
    category = definitions_dict.get(obj, 'Undefined')
    relationship_class_name__to_node.append(category + '__to_node')
df_relationship_class_name__to_node = pd.DataFrame(relationship_class_name__to_node, columns=['relationship_class_name'])
#concat this with data:
df_object__to_node = pd.concat([df_relationship_class_name__to_node, df_object__to_node_values], axis=1)

Object__from_node:

In [12]:
#create empty list where output will be stored
relationship_class_name__from_node = []
#loop where Python finds out whether object is a unit or a connection:
for obj in df_object__from_node_values['object']:
    category = definitions_dict.get(obj, 'Undefined')
    relationship_class_name__from_node.append(category + '__from_node')
df_relationship_class_name__from_node = pd.DataFrame(relationship_class_name__from_node, columns=['relationship_class_name'])
#concat this with data:
df_object__from_node = pd.concat([df_relationship_class_name__from_node, df_object__from_node_values], axis=1)

Combine the two:

In [14]:
df_object__node = pd.concat([df_object__to_node,df_object__from_node], axis=0)
df_object__node.reset_index()
#show df head for control
df_object__node.head()

Unnamed: 0,relationship_class_name,object,node,parameter_name,value
0,unit__to_node,Solar_Plant_Kasso,Power_Kasso,unit_capacity,304.0
1,unit__to_node,Electrolyzer_Kasso,Hydrogen_Kasso,,
2,unit__to_node,CO2_Vaporizer,Vaporized_Carbon_Dioxide,unit_capacity,100.0
3,unit__to_node,Destilation_Tower,E-Methanol_Kasso,unit_capacity,52.0
4,unit__to_node,Methanol_Reactor,Waste_Heat,unit_capacity,10.0


#### Demand and Renewables Availability:

In [15]:
#create table headers and relations
column_names_1 = {'DateTime '+area: [None, None],
                'Hydrogen_Kasso': ['node','demand'], 
                'E-Methanol_Kasso': ['node','demand'], 
                'Solar_Plant_Kasso': ['node','unit_availability_factor']}
df_blank_table_1 = pd.DataFrame(column_names_1, index=None)
#add values
df_temp_1 = pd.DataFrame(columns=['DateTime ' + area, 'Hydrogen_Kasso', 'E-Methanol_Kasso', 'Solar_Plant_Kasso'])

df_temp_1['DateTime '+area] = df_time
df_temp_1['Hydrogen_Kasso'] = 0
df_temp_1['E-Methanol_Kasso'] = 25
df_temp_1['Solar_Plant_Kasso'] = df_PV_availabilityfactors_values['unit_availability_factor']

df_table_1 = pd.concat([df_blank_table_1, df_temp_1])
#show table head for control
df_table_1.head()

Unnamed: 0,DateTime DK1,Hydrogen_Kasso,E-Methanol_Kasso,Solar_Plant_Kasso
0,,node,node,node
1,,demand,demand,unit_availability_factor
0,2019-01-01T00:00:00,0,25,0.0
1,2019-01-01T01:00:00,0,25,0.0
2,2019-01-01T02:00:00,0,25,0.0


#### Energy prices:

In [16]:
column_names_2 = {'DateTime ' + area: ['relationship class','connection','node','parameter name'],
                'Power_Wholesale_In': ['connection__from_node','power_line_Wholesale_Kasso','Power_Wholesale','connection_flow_cost'], 
                'Power_Wholesale_Out': ['connection__to_node','power_line_Wholesale_Kasso','Power_Wholesale','connection_flow_cost'], 
                'District_Heating': ['connection__to_node','pipeline_District_Heating','District_Heating','connection_flow_cost']}
df_blank_table_2 = pd.DataFrame(column_names_2, index=None)
df_temp_2 = pd.DataFrame(columns=['DateTime ' + area, 'Power_Wholesale_In', 'Power_Wholesale_Out', 'District_Heating'])

df_temp_2['DateTime ' + area] = df_time
df_temp_2['Power_Wholesale_In'] = df_powerprices_values['SpotPriceEUR']
df_temp_2['Power_Wholesale_Out'] = -1 * df_powerprices_values['SpotPriceEUR']
df_temp_2['District_Heating'] = -1

df_table_2 = pd.concat([df_blank_table_2, df_temp_2], ignore_index=True)
#show table head for control
df_table_2.head()

Unnamed: 0,DateTime DK1,Power_Wholesale_In,Power_Wholesale_Out,District_Heating
0,relationship class,connection__from_node,connection__to_node,connection__to_node
1,connection,power_line_Wholesale_Kasso,power_line_Wholesale_Kasso,pipeline_District_Heating
2,node,Power_Wholesale,Power_Wholesale,District_Heating
3,parameter name,connection_flow_cost,connection_flow_cost,connection_flow_cost
4,2019-01-01T00:00:00,28.32,-28.32,-1


#### Time Series Storage:

In [17]:
#date index
before = start_date-timedelta(hours=1)
date_index_beginning = pd.date_range(start=before, end=start_date, freq='H')
formatted_beginning = date_index_beginning.strftime('%Y-%m-%dT%H:%M:%S')
df_formatted_beginning = pd.DataFrame(formatted_beginning, columns=['DateTime'])
df_time_beginning = pd.DataFrame(df_formatted_beginning)
#add one blank row
new_row = pd.Series([])
df_time_beginning = pd.concat([pd.DataFrame([new_row]), df_time_beginning]).reset_index(drop=True)
#concat raw data with time index
df_storage = pd.concat([df_time_beginning, df_storage_raw], axis=1)
df_storage

Unnamed: 0,DateTime,Hydrogen_storage_Kasso,E-Methanol_storage_Kasso
0,,fix_node_state,fix_node_state
1,2018-12-31T23:00:00,0,0
2,2019-01-01T00:00:00,,


#### Model:

In [18]:
### take first 3 columns and add another table with "Alternative", "Value"
column_names_model = {'Object_class_name':[],
                      'Object_name': [],
                      'Parameter':['model_start','model_end','resolution'],
                      'Alternative': [scenario, scenario, scenario],
                      'Value': ['"type": "date_time", "data": '+min(df_time),
                          '"type": "date_time", "data": '+max(df_time), 
                          '"type":"duration", "data": "'+frequency+'"']}
df_blank_table_model = pd.DataFrame(column_names_model, index=None)
df_blank_table_model


ValueError: All arrays must be of the same length

### Creating one combined excel and export

1. Definition of objects (df_definition)
2. Relationship classes (non-existent yet)
3. Object-node-node (df_object__node_node)
4. Variable_Eff (from Full_Excel)
5. Time_Series_Storage (non-existent yet)
6. Demand (df_table_1)
7. Energy prices (df_table_2)
8. Model components (from Full_Excel)   ###necessary? Steht doch auch alles in Model relationships
9. Model relationships (from Full_Excel)
10. Model (non-existent yet)

In [19]:
### check whether all the sheets are in the correct format:
if isinstance(df_object__node, pd.DataFrame):
    print("DataFrame")
else: 
    print("no DataFrame")

DataFrame


In [20]:
with pd.ExcelWriter('Input_data_combined.xlsx') as writer:
    df_definition.to_excel(writer, sheet_name='Definition', index=False)
    df_object__node.to_excel(writer, sheet_name='Object__from-to_node', index=False)
    df_object__node_node.to_excel(writer, sheet_name='Object__node_node', index=False)
    df_variable_efficiency.to_excel(writer, sheet_name='Variable_Eff', index=False)
    df_storage.to_excel(writer, sheet_name='Time_series_storage', index=False)
    df_table_1.to_excel(writer, sheet_name='Demand', index=False)
    df_table_2.to_excel(writer, sheet_name='Energy_prices', index=False)
    df_model_relations.to_excel(writer, sheet_name='Model_relations', index=False)
    ###placeholder Model