# Regression Module

This notebook generates the coefficient values from simulation data.

For detailed explanations, see Appendix D.

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

## Reading Data

In [6]:
# read Excel file
source_file_name = "NGCC_performance.xlsx"
dfs = pd.read_excel("../resources/" + source_file_name, sheet_name=None, index_col=0, engine='openpyxl')

# strip whitespace for index
for df in dfs.values():
    df.index = df.index.str.strip()

# 3 DataFrames for 3 tabs in the Excel file
df_no_carbon_cap = dfs['No Carbon Capture']
df_max_power = dfs['Max Power']
df_max_DAC = dfs['Max DAC']

indices = df_max_DAC.columns[1:]

In [4]:
def transfer_idx(indices):
    """
    Transform index number from [0, 1] to [0, 100]%.
    As there are two "0.5" values, one of them is marked as "0.5.1".
    """
    return [50 if i == "0.5.1" else i * 100 for i in indices]

## Preprocessing Coefficients

In [7]:
# new DataFrame to store preprocessed data
df = pd.DataFrame(columns=df_max_DAC.columns)
df = df.rename(columns={'Unnamed: 1':'unit'})

extreme_points = [1, 0.5]

### Auxiliary Load

In [8]:
aux_rows = [
    "Circulating Water Pumps",
    "Combustion Turbine Auxiliaries",
    "Condensate Pumps",
    "Cooling Tower Fans",
    "Feedwater Pumps",
    "Ground Water Pumps",
    "Miscellaneous Balance of Plant",
    "SCR",
    "Steam Turbine Auxiliaries",
    "Transformer Losses"
]

# sum these auxiliary load up
df.loc["auxiliary_load"] = df_max_power.loc[aux_rows].sum()
# set unit
df.loc["auxiliary_load", "unit"] = "kW"
df.loc["auxiliary_load"]

unit               kW
1        17358.919395
0.9      16544.115948
0.8      15558.596325
0.7      14508.690511
0.6      13509.488383
0.5      12266.054229
0.5.1     9995.709746
0.4       9283.309286
0.3       8430.369506
0.25      7896.148618
Name: auxiliary_load, dtype: object

### DAC Base Steam (Energy (MMBtu) Based)

In [9]:
hv_hf = 959 # Btu/lb for 20.8 psia steam

df.loc["DAC_base_steam", indices] = df_max_power.loc["DAC Steam Flow", indices] * hv_hf / 1e6

df.loc["DAC_base_steam", "unit"] = "MMBtu/hr"
df.loc["DAC_base_steam"]

unit       MMBtu/hr
1        227.382736
0.9      216.203673
0.8       203.00112
0.7      188.195119
0.6      173.247186
0.5       151.68503
0.5.1    119.220003
0.4      106.297478
0.3       90.148877
0.25      77.016331
Name: DAC_base_steam, dtype: object

### Allocable Steam

In [10]:
df.loc["allocable_steam", indices] = (df_max_DAC.loc["DAC Steam Flow", indices] - df_max_power.loc["DAC Steam Flow", indices]) * hv_hf / 1e6

df.loc["allocable_steam", "unit"] = "MMBtu/hr"
df.loc["allocable_steam"]

unit       MMBtu/hr
1        703.431295
0.9      691.991384
0.8      658.870401
0.7      627.162984
0.6      599.491039
0.5      555.731869
0.5.1    345.650452
0.4      326.106032
0.3      296.523759
0.25     268.350257
Name: allocable_steam, dtype: object

### CO2 Emission from Flue Gas

In [11]:
df.loc["CO2_flue_gas", indices] = df_no_carbon_cap.loc["CO2 Emission Rate", indices].multiply(df_no_carbon_cap.iloc[49, 1:11])

df.loc["CO2_flue_gas", "unit"] = "tonne CO2/h"
df.loc["CO2_flue_gas"]

unit     tonne CO2/h
1         258.256728
0.9       240.643484
0.8       222.991974
0.7       202.873639
0.6       182.423164
0.5       160.197542
0.5.1     129.128364
0.4       111.495987
0.3        91.211582
0.25       80.098771
Name: CO2_flue_gas, dtype: object

## Regression

In [12]:
# each entry has 3 values: DataFrame, row name, unit
params = {
    "GT_power": [df_max_power, "Gas Turbine", "kW"],
    "fuel": [df_max_power, "Heat Input to Turbine(s)", "MMBtu/hr"],
    "HP_power": [df_max_power, "HP", "kW"],
    "IP_power": [df_max_power, "IP", "kW"],
    "auxiliary_load": [df, "auxiliary_load", "kW"],
    "DAC_base_steam": [df, "DAC_base_steam", "MMBtu/hr"],
    "allocable_steam": [df, "allocable_steam", "MMBtu/hr"],
    "CO2_flue_gas": [df, "CO2_flue_gas", "tonne CO2/hr"]
}

In [13]:
def linear_regress(df, idx_name, points):
    """
    Conduct linear regression on pieces[piece_idx] of df.idx_name.
    """

    # X value needs to be transferred first into % value
    X = np.array(transfer_idx(points)).reshape(-1, 1)
    y = df.loc[idx_name, points].values

    regressor = LinearRegression()
    regressor.fit(X, y)
    return regressor.coef_[0], regressor.intercept_

In [19]:
coefs = []

for k, v in params.items():
        df, idx_name, unit = v
        m, b = linear_regress(df, idx_name, extreme_points)
        coefs.append([k, m, b, unit])

In [20]:
df_coef = pd.DataFrame(coefs, columns=['y', 'a', 'b', 'unit'])
df_coef

Unnamed: 0,y,a,b,unit
0,GT_power,4770.0,5.820766e-11,kW
1,fuel,35.124399,1103.131,MMBtu/hr
2,HP_power,388.180893,13191.96,kW
3,IP_power,464.88,2267.0,kW
4,auxiliary_load,101.857303,7173.189,kW
5,DAC_base_steam,1.513954,75.98732,MMBtu/hr
6,allocable_steam,2.953989,408.0324,MMBtu/hr
7,CO2_flue_gas,1.961184,62.13836,tonne CO2/hr


In [16]:
df_coef.to_csv("coefs.csv", index=False)