In [1]:

print("------------------")
print("Program Started for RF Char Yield Model")
print("------------------\n")

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import time
import os

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from sklearn import model_selection
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorboard.plugins.hparams import api as hp
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

print(tf.__version__)

print("--------Modules have been loaded----------\n")

------------------
Program Started for RF Char Yield Model
------------------

2.6.0-dev20210508
--------Modules have been loaded----------



In [2]:
dir_p = 'C:\\Users\\Honeyz\\Desktop\\Modeling\\Final models\\CoPyro Project\\Data\\LiquidCoPyroDataProcessed_Proximate.csv'

raw_dataset = pd.read_csv(dir_p, skipinitialspace=True)

dataset = raw_dataset.copy()
dataset = dataset.dropna()

print("--------Data have been loaded & splitted---------\n")

_seed = 42 
_random_state = np.random.RandomState(_seed)
strata_1 = pd.cut(dataset.loc[:, "w%P"], bins=[-1, 20, 40, 60, 80, np.inf],labels=[1, 2, 3, 4, 5])
training_data, testing_data = model_selection.train_test_split(dataset, test_size=0.1,stratify=strata_1, random_state=_random_state)

print("--------Data have been stratified----------\n")
training_features = training_data.drop("Liquid%", axis=1).astype(float)
training_target = training_data.loc[:, ["Liquid%"]].astype(float)

testing_features = testing_data.drop("Liquid%", axis=1)
testing_target = testing_data.loc[:, ["Liquid%"]]

sc = StandardScaler()
mmc = MinMaxScaler()
#X_train = sc.fit_transform(training_features)
#X_test = sc.fit_transform(testing_features)
#X_train = mmc.fit_transform(training_features)
#X_test = mmc.fit_transform(testing_features)


#input_dim = X_train.shape[1]

print("--------Data have been divided into features and targets----------\n")



--------Data have been loaded & splitted---------

--------Data have been stratified----------

--------Data have been divided into features and targets----------



In [3]:
%%time

def build_and_compile_model(loss, r1, l1):

    normalizer = preprocessing.Normalization()
    normalizer.adapt(np.array(training_features))

    model = keras.Sequential([
        normalizer,
        layers.Dense(l1, activation='relu'),
        layers.Dropout(r1),
        layers.Dense(l1, activation='relu'),
        layers.Dropout(r1),
        layers.Dense(l1, activation='relu'),
        layers.Dense(1, activation='linear')])
    model.compile(loss=loss, optimizer=tf.keras.optimizers.Adam(0.001),metrics=[keras.metrics.MeanSquaredError()])
        
    return model

# Define search space
epochs = [350, 450, 550]
batch_size = [16,32,64]
loss = ['mean_squared_error']
l1 = [8,16,32,64]
r1 = [0.08, 0.1, 0.12]

# Setup the grid to be searched over
param_grid = dict(batch_size=batch_size, epochs=epochs, loss=loss, l1=l1, r1=r1)

# Make scikit-learn accepted Keras model
DNN_regressor = KerasRegressor(build_fn=build_and_compile_model, verbose=42)

print("--------Model and search space have been defined----------\n")

GridSearch_cv = model_selection.GridSearchCV(
    DNN_regressor,
    param_grid=param_grid,
    scoring="neg_mean_squared_error",
    cv=KFold(n_splits=10, shuffle=True, random_state=42),
    n_jobs=-1,
    verbose=42
)


GridSearch_cv.fit(training_features,training_target)

print("Model Best parameters.........\n")

print(GridSearch_cv.best_params_)

print("\nModel Best RMSE score.........\n")

print((-GridSearch_cv.best_score_)**0.5)

--------Model and search space have been defined----------

Fitting 10 folds for each of 108 candidates, totalling 1080 fits


KeyboardInterrupt: 

In [None]:
print("--------Retrain model with best parameters found from grid search----------\n")

# Retrain model with best parameters found from grid search
best_params = GridSearch_cv.best_params_
model = build_and_compile_model(loss=best_params['loss'], r1=best_params['r1'], l1=best_params['l1'])
model.fit(training_features, training_target, epochs=best_params['epochs'], batch_size=best_params['batch_size'], verbose=1)

In [None]:

print("Saving Model .........")

# save the model
filename = 'C:\\Users\\Honeyz\\Desktop\\Modeling\\Final models\\CoPyro Project\\Saved Models\\Char_DNN_model.h5'
model.save(filename)

In [None]:
print("--------Test the model on hideout test data----------\n")

model = GridSearch_cv.best_estimator_
predictions = model.predict(testing_features)
mae = mean_absolute_error(testing_target, predictions)
mse = mean_squared_error(testing_target, predictions)
rmse = mse**0.5
r2 = r2_score(testing_target, predictions)
print("The RMSE Score: ")
print(mse**0.5)

print("\nThe R^2 Score: ")
print(r2)

print("\nThe MAE Score: ")
print(mae)


In [None]:
a = plt.axes(aspect='equal')
plt.scatter(testing_target, predictions)
plt.xlabel('True Values')
plt.ylabel('Predictions')
lims = [28, 100]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)

In [None]:
OutF = open("C:\\Users\\Honeyz\\Desktop\\Modeling\\Final models\\CoPyro Project\\Results\\Performace_Output_Liquid_DNN_Notebook.txt","a")
OutF.write("\n RMSE: ")
OutF.write(str(round(rmse,3)))
OutF.write("\n MAE: ")
OutF.write(str(round(mae,3)))
OutF.write("\n RSBS: ")
OutF.write(str(rsbs))
OutF.write("\n")
OutF.close()

OutTD = open("C:\\Users\\Honeyz\\Desktop\\Modeling\\Final models\\CoPyro Project\\Results\\TestData_Output_Liquid_DNN_Notebook.txt","a")
OutTD.write("\n Actual data: ")
OutTD.write(str(testing_target.values))
OutTD.write("\n Prediction data: ")
OutTD.write(str(predictions))
OutTD.close()

