# Data Preparation for the Nord_H2ub Spine Model

This jupyter notebook contains all routines for the preparation of the input data sources into a input data file for the model in Spine. 

**Authors:** Johannes Giehl (jfg.eco@cbs.dk), Dana Hentschel (djh.eco@cbs.dk)

## General settings

### Packages:

In [6]:
import numpy as np
import pandas as pd

### Base parameters

In [10]:
#define year and create time stamp
year = 2019   #change to desired year
start_date = pd.Timestamp(f'{year}-01-01 00:00:00')
end_date = pd.Timestamp(f'{year}-12-31 23:00:00')
#set area
area = 'DK1'   #change to desired area

### File paths

In [11]:
#set path to correct folders
#input data
excel_file_path = '../Input_data/Input_raw/'

In [15]:
#set name of the relevant files

definition = 'Full_Excel.xlsx'
PV_data_availabilityfactors = 'PV_availability_factors_Kasso_'+str(year)+'.xlsx'
data_powerprices = 'Day_ahead_prices_'+str(year)+'.xlsx'


## Workflow of the data preparation

### General parameters

In [16]:
#date index
date_index = pd.date_range(start=start_date, end=end_date, freq='H')
formatted_dates = date_index.strftime('%Y-%m-%dT%H:%M:%S')
df_formatted_dates = pd.DataFrame(formatted_dates, columns=['DateTime'])

df_time = pd.DataFrame(df_formatted_dates)

### Data import

In [17]:
#Definition of objects
df_units = pd.read_excel(excel_file_path+definition, sheet_name='Units')
df_nodes = pd.read_excel(excel_file_path+definition, sheet_name='Nodes')
df_connections = pd.read_excel(excel_file_path+definition, sheet_name='Connections')
#Object-node-node
df_object__node_node_values = pd.read_excel(excel_file_path+definition, sheet_name='Object__node_node')
#Availability factor
df_PV_availabilityfactors_values = pd.read_excel(excel_file_path+PV_data_availabilityfactors, skiprows=2, usecols=[0,1,2,3,4,5])
#Power prices
df_powerprices_total_values = pd.read_excel(excel_file_path+data_powerprices)
#only extracting the prices from our earlier defined area
df_powerprices_values = df_powerprices_total_values[df_powerprices_total_values['PriceArea'] == area]
df_powerprices_values = df_powerprices_values.reset_index(drop=True)
#Model
df_model = pd.read_excel(excel_file_path+definition, sheet_name='Model')

### Adjustments

#### Adjust base alements:

In [18]:
#define correct columns names
df_units['Category'] = 'Unit'
df_nodes['Category'] = 'Node'
df_connections['Category'] = 'Connection'

df_definition = pd.concat([df_units, df_nodes, df_connections], ignore_index=True)
#creating a dictionary whether an object is a unit or a connection:
definitions_dict = dict(zip(df_definition['Object_Name'],df_definition['Category']))

#### Times series:

In [19]:
#adjust PV columns names
df_PV_availabilityfactors_values.rename(columns={'time': 'time [UTC]', 
                                                 'local_time': 'time ['+area+']',
                                                 'electricity': 'unit_availability_factor'}, inplace=True)
df_powerprices_values.rename(columns={'HourUTC': 'time [UTC]', 
                                         'HourDK': 'time ['+area+']'}, inplace=True)

### Fitting data into format

Object__node_node:

In [20]:
#create empty list where output will be stored
relationship_class_name__node_node = []
#loop where Python finds out whether object is a unit or a connection:
for obj in df_object__node_node_values['object']:
    category = definitions_dict.get(obj, 'Undefined')
    relationship_class_name__node_node.append(f'{category}__node_node')
df_relationship_class_name__node_node = pd.DataFrame(relationship_class_name__node_node, columns=['relationship_class_name'])
#concat this with data:
df_object__node_node = pd.concat([df_relationship_class_name__node_node, df_object__node_node_values], axis=1)
#show df head for control
df_object__node_node.head()

Unnamed: 0,relationship_class_name,object,node,node.1,parameter_name,value
0,Unit__node_node,CO2_Vaporizer,Carbon_Dioxide,Vaporized_Carbon_Dioxide,fix_ratio_in_out_unit_flow,1
1,Unit__node_node,Methanol_Reactor,Hydrogen_Kasso,Vaporized_Carbon_Dioxide,fix_ratio_in_in_unit_flow,1
2,Unit__node_node,Methanol_Reactor,Hydrogen_Kasso,Raw_Methanol,fix_ratio_in_out_unit_flow,1
3,Unit__node_node,Methanol_Reactor,Raw_Methanol,Waste_Heat,fix_ratio_out_out_unit_flow,4
4,Unit__node_node,Destilation_Tower,Raw_Methanol,E-Methanol_Kasso,fix_ratio_in_out_unit_flow,1


#### Demand and Renewables Availability:

In [21]:
#create table headers and relations
column_names_1 = {'DateTime '+area: [None, None],
                'Hydrogen_Kasso': ['node','demand'], 
                'E-Methanol_Kasso': ['node','demand'], 
                'Solar_Plant_Kasso': ['node','unit_availability_factor']}
df_blank_table_1 = pd.DataFrame(column_names_1, index=None)
#add values
df_temp_1 = pd.DataFrame(columns=['DateTime '+area, 'Hydrogen_Kasso', 'E-Methanol_Kasso', 'Solar_Plant_Kasso'])

df_temp_1['DateTime '+area] = df_time
df_temp_1['Hydrogen_Kasso'] = 0
df_temp_1['E-Methanol_Kasso'] = 25
df_temp_1['Solar_Plant_Kasso'] = df_PV_availabilityfactors_values['unit_availability_factor']

df_table_1 = pd.concat([df_blank_table_1, df_temp_1])
#show table head for control
df_table_1.head()

Unnamed: 0,DateTime DK1,Hydrogen_Kasso,E-Methanol_Kasso,Solar_Plant_Kasso
0,,node,node,node
1,,demand,demand,unit_availability_factor
0,2019-01-01T00:00:00,0,25,0.0
1,2019-01-01T01:00:00,0,25,0.0
2,2019-01-01T02:00:00,0,25,0.0


#### Energy prices:

In [22]:
column_names_2 = {'DateTime '+area: ['relationship class','connection','node','parameter name'],
                'Power_Wholesale_In': ['connection__from_node','power_line_Wholesale_Kasso','Power_Wholesale','connection_flow_cost'], 
                'Power_Wholesale_Out': ['connection__to_node','power_line_Wholesale_Kasso','Power_Wholesale','connection_flow_cost'], 
                'District_Heating': ['connection__to_node','pipeline_District_Heating','District_Heating','connection_flow_cost']}
df_blank_table_2 = pd.DataFrame(column_names_2, index=None)
df_temp_2 = pd.DataFrame(columns=['DateTime '+area, 'Power_Wholesale_In', 'Power_Wholesale_Out', 'District_Heating'])

df_temp_2['DateTime '+area] = df_time
df_temp_2['Power_Wholesale_In'] = df_powerprices_values['SpotPriceEUR']
df_temp_2['Power_Wholesale_Out'] = -1*df_powerprices_values['SpotPriceEUR']
df_temp_2['District_Heating'] = -1

df_table_2 = pd.concat([df_blank_table_2, df_temp_2], ignore_index=True)
#show table head for control
df_table_2.head()

Unnamed: 0,DateTime DK1,Power_Wholesale_In,Power_Wholesale_Out,District_Heating
0,relationship class,connection__from_node,connection__to_node,connection__to_node
1,connection,power_line_Wholesale_Kasso,power_line_Wholesale_Kasso,pipeline_District_Heating
2,node,Power_Wholesale,Power_Wholesale,District_Heating
3,parameter name,connection_flow_cost,connection_flow_cost,connection_flow_cost
4,2019-01-01T00:00:00,28.32,-28.32,-1


#### Model:

### Creating one combined excel and export

1. Definition of objects (df_definition)
2. Relationship classes (non-existent yet)
3. Object-node-node (df_object__node_node)
4. Variable_Eff (from Full_Excel)
5. Time_Series_Storage (non-existent yet)
6. Demand (df_table_1)
7. Energy prices (df_table_2)
8. Model components (from Full_Excel)   ###necessary? Steht doch auch alles in Model relationships
9. Model relationships (from Full_Excel)
10. Model (non-existent yet)