In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_excel("data\\data.xlsx")
df = df[df["LMTD"] > 0]
df = df[df["LA Chilling Duty"] >= 0]
df["SCD (MJ/kg)"] = (0.001 * df["LA Chilling Duty"]) / df["Captured CO2 (kg/h)"]

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
from joblib import dump, load

In [4]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [5]:
XA = df[["FG Temperature", "Lean Loading",'Lean Amine Temperature','L/G (kg/kg)','Flue gas CO2 cons','CO2 Capture Efficiency','LMTD','Abs Height']]
XN = df[["FG Temperature", "Lean Loading",'Lean Amine Temperature','L/G (kg/kg)','Flue gas CO2 cons','CO2 Capture Efficiency']]
y = df[["SRD (MJ/kg CO2)", "Condensor Specific Duty", "SCD (MJ/kg)"]]

In [6]:
XA_train, XA_test, y_train, y_test = train_test_split(XA, y, test_size=0.30, random_state=211)
XN_train, XN_test, y_train, y_test = train_test_split(XN, y, test_size=0.30, random_state=211)

In [7]:
param_grid = {'max_depth': [None, 2, 3, 4, 6, 10, 20],
              'min_samples_leaf': [1, 3, 4, 6, 10],
              'max_leaf_nodes': [None,10,20,30,40],
              'max_features': ['log2', 0.33, 1.0],
              'min_samples_split': [2, 4 , 6, 8 ],
              'splitter': ['random', 'best']
             }

In [8]:
dt = DecisionTreeRegressor(random_state=211)

In [9]:
model = GridSearchCV(dt, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

In [10]:
model.fit(XA_train, y_train['SRD (MJ/kg CO2)'])

In [11]:
model.best_params_

{'max_depth': None,
 'max_features': 1.0,
 'max_leaf_nodes': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'splitter': 'best'}

In [12]:
model.best_score_

-0.004841747880025471

In [13]:
# 'max_depth': [None, 2, 3, 4, 6, 8, 10, 20, 30, 40, 50, 60, 70] - None
# 'min_samples_leaf': [1, 2, 3, 4, 6, 8, 10, 20, 30, 40, 50, 60, 70] - 1
# 'max_leaf_nodes': [None,10,20,30,40,60,80,100,200] - None
# 'max_features': ['sqrt', 'log2', 0.33, 1.0] - 1.0
# 'min_samples_split': [2, 4 , 6, 8 , 10, 20, 30] - 2
# 'splitter': ['random', 'best'] - 'best'
# 'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'] - 'squared_error'
# 'min_impurity_decrease': [0.0, 0.005, 0.05, 0.1, 0.2, 0.3] - 0.0

In [14]:
mean_squared_error(y_train['SRD (MJ/kg CO2)'], model.predict(XA_train))

2.371325366395827e-33