# OSeMOSYS-PLEXOS global model: Powerplant data

### Import modules

In [None]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import numpy as np
%reload_ext blackcellmagic

### Import data files and user input

In [None]:
df = pd.read_csv(
    r"data/OPG_powerplant_data_19-03-2020.csv", encoding="latin-1", low_memory=False
)
df_dict = pd.read_csv(r"data/OPG_memberships_19-03-2020.csv")
df_dict = df_dict[df_dict["parent_class"] == "Generator"].rename(
    {"parent_object": "powerplant"}, axis=1
)
df_weo_data = pd.read_csv(r"data/weo_2018_powerplant_costs.csv")
df_op_life = pd.read_csv(r"data/operational_life.csv")
df_tech_code = pd.read_csv(r"data/naming_convention_tech.csv")

model_horizon = 2050

### Create main generator table

In [None]:
gen_cols_1 = ["child_class", "child_object", "property", "value"]
df_gen = df[gen_cols_1]
df_gen = df_gen[df_gen["child_class"] == "Generator"]
df_gen.rename(columns={"child_object": "powerplant"}, inplace=True)
df_gen.drop("child_class", axis=1, inplace=True)
df_gen = pd.pivot_table(df_gen,
                        index="powerplant",
                        columns="property",
                        values="value",
                        aggfunc=np.sum,
                        fill_value=0,
                       )
df_gen["total_capacity"] = (df_gen["Max Capacity"].astype(float)) * (
    df_gen["Units"].astype(int)
)

In [None]:
gen_cols_2 = ["Commission Date", "Heat Rate", "Max Capacity", "total_capacity"]
df_gen_2 = df_gen[gen_cols_2]

## Compile dataframe with powerplants, nodes, and fuels
df_dict_fuel = df_dict[df_dict["collection"] == "Fuels"]
df_dict_fuel = df_dict_fuel[["powerplant", "child_object"]]
df_dict_nodes = df_dict[df_dict["collection"] == "Nodes"]
df_dict_nodes = df_dict_nodes[["powerplant", "child_object"]]
df_dict_2 = pd.merge(df_dict_fuel, df_dict_nodes, how="outer", on="powerplant")

## Merge original generator dataframe with nodes and fuels
df_gen_2 = pd.merge(df_gen_2, df_dict_2, how="outer", on="powerplant")
df_gen_2.rename(
    {"child_object_x": "fuel", "child_object_y": "node"}, axis=1, inplace=True
)

## Extract start year from Commission Date
df_gen_2["Commission Date"] = pd.to_datetime(df_gen_2["Commission Date"])
df_gen_2["start_year"] = df_gen_2["Commission Date"].dt.year
df_gen_2.drop("Commission Date", axis=1, inplace=True)

## Calculate efficiency from heat rate. Units of heat rate in MJ/kWh
df_gen_2["efficiency"] = 3.6 / df_gen_2["Heat Rate"].astype(float)
df_gen_2.drop("Heat Rate", axis=1, inplace=True)

## Calcluate years of operation from start year until 2015
df_gen_2["years_of_operation"] = 2015 - df_gen_2["start_year"]

## Fix blank spaces in 'fuels' columns. Appearing for 'Oil' powerplants in certain countries
df_gen_2.loc[df_gen_2["fuel"].isna(), "fuel"] = (
    df_gen_2["node"].str.split("-").str[:2].str.join("-")
    + " "
    + df_gen_2["powerplant"].str.split("_", expand=True)[1]
)

In [None]:
## Create column for technology
df_gen_2["technology"] = df_gen_2["powerplant"].str.split("_").str[1]
df_gen_2["technology"] = df_gen_2["technology"].str.title()


## Divide Gas into CCGT and OCGT based on max capacity
df_gen_2.loc[
    (df_gen_2["technology"] == "Gas") & (df_gen_2["Max Capacity"].astype(float) > 130),
    "technology",
] = "Gas-CCGT"
df_gen_2.loc[
    (df_gen_2["technology"] == "Gas") & (df_gen_2["Max Capacity"].astype(float) <= 130),
    "technology",
] = "Gas-OCGT"

### Create table with aggregated capacity  

In [None]:
df_gen_agg_node = df_gen_2[df_gen_2['start_year']<=2015]
df_gen_agg_node = df_gen_agg_node.groupby(['node', 'technology'], 
                                          as_index=False)['total_capacity'].sum()
df_gen_agg_node = df_gen_agg_node.pivot(index='node', 
                                        columns='technology', 
                                        values='total_capacity').fillna(0).reset_index()

df_gen_agg_node.drop('Sto', axis=1, inplace=True) # Drop 'Sto' technology. Only for USA.

# Add extra nodes which exist in 2050 but are not in the 2015 data
nodes_extra_df = pd.DataFrame(columns=['node'])
nodes_extra_list = ['AF-SOM',
                    'AF-TCD',
                    'AS-TLS',
                    'EU-MLT',
                    'NA-BLZ',
                    'NA-HTI',
                    'SA-BRA-J1',
                    'SA-BRA-J2',
                    'SA-BRA-J3',
                    'SA-SUR',]
nodes_extra_df['node'] = nodes_extra_list

df_gen_agg_node = df_gen_agg_node.append(nodes_extra_df,
                                         ignore_index=True,
                                         sort='False').fillna(0).sort_values(by='node').set_index('node').round(2)
#df_gen_agg_node.to_csv(r'output/test_output_2.csv')

### Add region and country code columns

In [None]:
df_gen_2['region_code'] = df_gen_2['node'].str[:2]
df_gen_2['country_code'] = df_gen_2['node'].str[3:]

### Add operational life column

In [None]:
op_life_dict = dict(zip(list(df_op_life['tech']),
                        list(df_op_life['years'])))

df_gen_2['operational_life'] = df_gen_2['technology'].map(op_life_dict)
df_gen_2['retirement_year_data'] = (df_gen_2['operational_life'] 
                                    + df_gen_2['start_year'])
df_gen_2['retirement_diff'] = ((df_gen_2['years_of_operation'] 
                               - df_gen_2['operational_life'])/
                               df_gen_2['operational_life'])

''' Set retirement year based on years of operation. 
If (years of operation - operational life) is more than 50% of 
operational life, set retirement year
'''
df_gen_2.loc[df_gen_2['retirement_diff'] >= 0.5, 
             'retirement_year_model'] = 2020
df_gen_2.loc[(df_gen_2['retirement_diff'] < 0.5) &
             (df_gen_2['retirement_diff'] > 0), 
             'retirement_year_model'] = 2025
df_gen_2.loc[df_gen_2['retirement_diff'] <= 0, 
             'retirement_year_model'] = df_gen_2['retirement_year_data']

#df_gen_2.to_csv(r'output/test_output_3.csv')

### Add naming convention

In [None]:
tech_code_dict = dict(zip(list(df_tech_code['tech']),
                          list(df_tech_code['code'])))
df_gen_2['tech_code'] = df_gen_2['technology'].map(tech_code_dict)

df_gen_2.loc[df_gen_2['node'].str.len() <= 6, 
             'node_code'] = (df_gen_2['node'].
                             str.split('-').
                             str[1:].
                             str.join("") +
                             'XX')
df_gen_2.loc[df_gen_2['node'].str.len() > 6, 
             'node_code'] = (df_gen_2['node'].
                             str.split('-').
                             str[1:].
                             str.join("")
                            )
df_gen_2

### Calculate residual capacity

In [None]:
res_cap_cols = [
    "node_code",
    "tech_code",
    "total_capacity",
    "start_year",
    "retirement_year_model",
]

df_res_cap = df_gen_2[res_cap_cols]
for each_year in range(2015, model_horizon):
    df_res_cap[str(each_year)] = 0

df_res_cap = pd.melt(
    df_res_cap,
    id_vars=res_cap_cols,
    value_vars=[x for x in df_res_cap.columns if x not in res_cap_cols],
    var_name="model_year",
    value_name="value",
)
df_res_cap["model_year"] = df_res_cap["model_year"].astype(int)
df_res_cap.loc[
    (df_res_cap["model_year"] >= df_res_cap["start_year"])
    & (df_res_cap["model_year"] <= df_res_cap["retirement_year_model"]),
    "value",
] = df_res_cap["total_capacity"]

df_res_cap = df_res_cap.groupby(
    ["node_code", "tech_code", "model_year"], as_index=False
)["value"].sum()

# Add column with naming convention
df_res_cap['tech'] = ('PWR' + 
                      df_res_cap['tech_code'] + 
                      df_res_cap['node_code'] + '01'
                     )
# Convert total capacity from MW to GW
df_res_cap['value'] = df_res_cap['value'].div(1000)

# Rename 'model_year' to 'year' and 'total_capacity' to 'value' 
df_res_cap.rename({'tech':'TECHNOLOGY',
                   'model_year':'YEAR',
                   'value':'VALUE'}, 
                  inplace = True,
                  axis=1)
# Drop 'tech_code' and 'node_code'
df_res_cap.drop(['tech_code', 'node_code'], inplace = True, axis=1)        

# Add 'REGION' column and fill 'GLOBAL' throughout
df_res_cap['REGION'] = 'GLOBAL'

#Reorder columns
df_res_cap = df_res_cap[['REGION', 'TECHNOLOGY', 'YEAR', 'VALUE']]
                     
df_res_cap.to_csv(r"output/ResidualCapacity.csv", index = None)
print(df_res_cap)

### Add input and output activity ratios

### Append powerplant capital and fixed costs

In [None]:
df_cap_fix_costs = pd.melt(
    df_weo_data,
    id_vars=["technology", "region/country", "parameter"],
    value_vars=["2017", "2030", "2040"],
    var_name="year",
).reset_index()

df_cap_fix_costs["parameter"] = df_cap_fix_costs["parameter"].str.split(
    "\n", expand=True
)[0]

weo_region_map = {
    "AF": ["Africa"],
    "AS": ["China", "India", "Japan", "Middle East", "Russia"],
    "EU": ["Europe", "Russia"],
    "NA": ["United States"],
    "SA": ["Brazil"],
}

# weo_tech_map = {'Bio':['Biomass Power plant'],
#                'Coa':['Steam Coal - SUBCRITICAL', 'Steam Coal - SUPERCRITICAL', 'Steam Coal - ULTRASUPERCRITICAL'],
#                'Cog':['Biomass CHP Medium', 'Biomass CHP Small', 'Biomass Cofiring'],
#                'Gas-CCGT':['CCGT'],
#                'Gas-OCGT':['Gas turbine'],
#                'Geo':['Geothermal'],
#                'Hyd':['Hydropower - large-scale', 'Hydropower - small-scale'],
#                'Nuc':['Nuclear'],
#                'Oil':
#                'Oth':
#                'Pet':
#                'Sol':['Solar photovoltaics - Buildings', 'Solar photovoltaics - Large scale'],
#                'Was':['Biomass - waste incineration - CHP'],
#                'Wav':['Marine'],
#                'Win':['Wind onshore'],
#               }

df_cap_fix_costs.replace("n.a.", 0, inplace=True)

df_cap_fix_costs = pd.pivot_table(
    df_cap_fix_costs,
    index=["technology", "parameter", "year"],
    columns="region/country",
    values="value",
    aggfunc="sum",
).reset_index()


df_cap_fix_costs