In [7]:
# import libraries:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Activation
from keras.layers.core import Dense
from keras.optimizers import SGD, adam
from keras.models import load_model
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasRegressor
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
import time 
import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
warnings.filterwarnings('ignore')

In [8]:
########## Below Lines should be moved to help module & imported in one line:
def handle_data(path='output.xlsx', scale=False, include_out=False ):
    global df
    df=pd.read_excel(path, index=False)                                               # output of preprocessed data
    print('Loadded data from {}'.format(path))
    df = df.drop(['Unnamed: 0','Fillage (%)','PD (ft)','MPMPL (lb)'], axis=1)
    if scale == False:                                                                # will normalize data                 
        num_df=df.drop(['Fillage_low','Fillage_medium','Fillage_high','PRHP'], axis=1)      
        num_df_norm=(num_df-num_df.mean())/(num_df.std())                              # Tensorflow tutourials 
        # num_df_norm=(num_df-num_df.mean())/(num_df.std()+1e-7)                       # Stackoverflow reply
        cat_df_encoded=df[['Fillage_low','Fillage_medium','Fillage_high']]             # one hot coding already applied 
        label_df=df['PRHP']
        if include_out == False:
            df=pd.concat([num_df_norm,cat_df_encoded, label_df], axis=1)
        else:
            label_df=df['PRHP']/df['PRHP'].max()                                       
            df=pd.concat([num_df_norm,cat_df_encoded, label_df], axis=1)
        print('Normalizing Data completed')
    else:
        num_df=df.drop(['Fillage_low','Fillage_medium','Fillage_high','PRHP'], axis=1)
        num_values = num_df.values                                  
        scaler_x=MinMaxScaler(feature_range=(0,1))
        scaler_x.fit(num_values)
        num_values_scaled=scaler_x.transform(num_values)
        num_df_scaled = pd.DataFrame(num_values_scaled, columns=['P.Dia (in)', 'SL (in)', 'SPM (spm)', 'B.Weight (lbf)', 'PIP (psi)',
       'PPMPL (lb)', 'Friction (lb)'])
        cat_df_encoded=df[['Fillage_low','Fillage_medium','Fillage_high']]             # one hot coding already applied 
        label_df=df['PRHP']
        if include_out == False:
            df=pd.concat([num_df_scaled,cat_df_encoded, label_df], axis=1)
        else:
            label_df=df['PRHP']/df['PRHP'].max()                                       
            df=pd.concat([num_df_norm,cat_df_encoded, label_df], axis=1)
        print('Scaling Data completed')
    return df
        
# def split_data(df=df, test_size=0.15):
#     (train_data, test_data) = train_test_split(df, test_size=test_size, random_state=42)
#     print('Data divided as following :{} points for training and {} points for testing'.format(len(train_data), len(test_data)))
#     train_data_features=train_data.drop('PRHP', axis=1)
#     train_data_labels=train_data['PRHP']
#     train_data_labels=pd.DataFrame(train_data_labels,  columns=['PRHP'])
#     test_data_features=test_data.drop('PRHP', axis=1)
#     test_data_labels=test_data['PRHP']
#     print('Data separated into {} inputs and {} output'.format(train_data_features.shape[1],train_data_labels.shape[1]))


In [9]:
# Get Data & normalize it:
handle_data()
df=df.iloc[0:500,]
features=df.drop('PRHP', axis=1)
labels=df['PRHP']
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

Loadded data from output.xlsx
Normalizing Data completed


In [10]:
# Function to selectively create our model:
def create_model(layers=[9], activation='relu', optimizer=adam, lr=1e-3):
    model=Sequential()
    for i, nodes in enumerate (layers):
        if i == 0:
            model.add(Dense(nodes,input_shape=(features.shape[1],), activation=activation))
        else:
            model.add(Dense(nodes,input_shape=(features.shape[1],), activation=activation))                        
    model.add(Dense(1))
    model.compile(optimizer=optimizer(lr=lr),loss='mean_squared_error', metrics=['mean_absolute_error'])
    return model

model = KerasRegressor(build_fn=create_model, verbose=0)
print('Model has been created')

Model has been created


In [11]:
# define the grid search parameters:
# layers = [[9],[8,5]]
# activation = ['relu', 'sigmoid','softmax','elu']
optimizer = [adam,SGD]
lr=[1e-3,1e-2]
batch_size = [10, 20]
epochs=[15,25]

param_grid = dict(batch_size=batch_size, epochs=epochs, lr=lr, optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1,scoring='neg_mean_squared_error', cv=3 )
start = time.time()
grid_result = grid.fit(features, labels)
end = time.time()
print('Training time :', round((end-start),2), 'Seconds')

# summarize results:
# print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
print('Number of K-Fold cross validation: {},  '.format(grid_result.n_splits_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
# for mean, stdev, param in zip(means, stds, params):
#     print("%f (%f) with: %r" % (mean, stdev, param))

Training time : 50.54 Seconds
Number of K-Fold cross validation: 3,  


In [6]:
print('For better accuracy use following configurations : {}'.format(grid_result.best_params_))

For better accuracy use following configurations : {'batch_size': 20, 'epochs': 25, 'lr': 0.01, 'optimizer': <class 'keras.optimizers.SGD'>}
