In [311]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
from tensorflow import keras

In [312]:
df = pd.read_csv("data/Foam Rheometer Data for ML.csv")
df.head()

Unnamed: 0,Gas,Pressure,Temperature,Surfactant,Concentration,Water Type,Chelating Agent,Corrosion Inhibitor,Shear Rate,Foam Quality,Viscosity
0,CO2,1000,300,Armovis EHS,1%,PW,0,0%,100,80.61,16.320256
1,CO2,1000,300,Armovis EHS,1%,PW,0,0.0%,250,78.395238,10.194167
2,CO2,1000,300,Armovis EHS,1%,PW,0,0.0%,500,76.326667,6.772267
3,CO2,1000,300,Armovis EHS,1%,PW,0,0.0%,750,75.541304,5.325935
4,CO2,1000,300,Armovis EHS,1%,PW,0,0.0%,1000,74.802,5.0888


In [313]:
print(df["Gas"].unique())
print(df["Surfactant"].unique())
print(df["Water Type"].unique())
df = pd.get_dummies(df, columns=["Gas", "Surfactant", "Water Type"], prefix=["Gas", "Surfactant", "Water"], drop_first = False)
print(df.columns)

['CO2' 'N2']
['Armovis EHS' 'Armovis EHS + TTM' 'TTM' 'Armogel O' 'Ethoduomeen T/13'
 'Ethomeen C12' 'CAS 50']
['PW' 'DI' 'SW' 'FW']
Index(['Pressure', 'Temperature', 'Concentration', 'Chelating Agent',
       'Corrosion Inhibitor', 'Shear Rate', 'Foam Quality', 'Viscosity',
       'Gas_CO2', 'Gas_N2', 'Surfactant_Armogel O', 'Surfactant_Armovis EHS',
       'Surfactant_Armovis EHS + TTM', 'Surfactant_CAS 50',
       'Surfactant_Ethoduomeen T/13', 'Surfactant_Ethomeen C12',
       'Surfactant_TTM', 'Water_DI', 'Water_FW', 'Water_PW', 'Water_SW'],
      dtype='object')


In [314]:
def remove_pc(string):
    if isinstance(string, str):
        string = string.replace('%', '')
        return float(string)

df['Concentration']=df['Concentration'].apply(remove_pc)
df['Corrosion Inhibitor']=df['Corrosion Inhibitor'].apply(remove_pc)
df['Chelating Agent']=df['Chelating Agent'].apply(remove_pc)

In [315]:
df.head(5)

Unnamed: 0,Pressure,Temperature,Concentration,Chelating Agent,Corrosion Inhibitor,Shear Rate,Foam Quality,Viscosity,Gas_CO2,Gas_N2,...,Surfactant_Armovis EHS,Surfactant_Armovis EHS + TTM,Surfactant_CAS 50,Surfactant_Ethoduomeen T/13,Surfactant_Ethomeen C12,Surfactant_TTM,Water_DI,Water_FW,Water_PW,Water_SW
0,1000,300,1.0,0.0,0.0,100,80.61,16.320256,1,0,...,1,0,0,0,0,0,0,0,1,0
1,1000,300,1.0,0.0,0.0,250,78.395238,10.194167,1,0,...,1,0,0,0,0,0,0,0,1,0
2,1000,300,1.0,0.0,0.0,500,76.326667,6.772267,1,0,...,1,0,0,0,0,0,0,0,1,0
3,1000,300,1.0,0.0,0.0,750,75.541304,5.325935,1,0,...,1,0,0,0,0,0,0,0,1,0
4,1000,300,1.0,0.0,0.0,1000,74.802,5.0888,1,0,...,1,0,0,0,0,0,0,0,1,0


In [316]:
y = df['Viscosity'].values
X = df.loc[:, df.columns!='Viscosity'].values

In [317]:
# Simple Test
X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.15)

In [318]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
print(X_train.shape)

(306, 20)


In [319]:
EPOCHS = 500
BATCH_SIZE = 24
VERBOSE = 1
NB_CLASSES = 10
N_HIDDEN = 25 
VALIDATION_SPLIT = 0.2
DROPOUT = 0.3

In [320]:
# BUILD MODEL WITH ONE LAYER
model = tf.keras.models.Sequential()
model.add(keras.layers.Dense(N_HIDDEN, input_shape = (20,),
                            name = 'dense_layer', kernel_initializer='normal',
                            activation = 'relu'))
#model.add(keras.layers.Dropout(DROPOUT))
model.add(keras.layers.Dense(25,
                            name = 'dense_layer_1', kernel_initializer='normal',
                            activation = 'relu'))
#model.add(keras.layers.Dropout(DROPOUT))
model.add(keras.layers.Dense(1, name = 'dense_layer_3', kernel_initializer='normal',
                            activation='linear'))
model.summary()

Model: "sequential_25"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_layer (Dense)          (None, 25)                525       
_________________________________________________________________
dense_layer_1 (Dense)        (None, 25)                650       
_________________________________________________________________
dense_layer_3 (Dense)        (None, 1)                 26        
Total params: 1,201
Trainable params: 1,201
Non-trainable params: 0
_________________________________________________________________


In [321]:
model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['mean_squared_error'])

In [322]:
#model.fit(X_train, y_train, batch_size = BATCH_SIZE, epochs = EPOCHS, verbose = VERBOSE,
#            validation_split = VALIDATION_SPLIT)
model.fit(X_train, y_train, epochs=7000, validation_split=0.2, verbose = VERBOSE)

Epoch 1/7000
Epoch 2/7000
Epoch 3/7000
Epoch 4/7000
Epoch 5/7000
Epoch 6/7000
Epoch 7/7000
Epoch 8/7000
Epoch 9/7000
Epoch 10/7000
Epoch 11/7000
Epoch 12/7000
Epoch 13/7000
Epoch 14/7000
Epoch 15/7000
Epoch 16/7000
Epoch 17/7000
Epoch 18/7000
Epoch 19/7000
Epoch 20/7000
Epoch 21/7000
Epoch 22/7000
Epoch 23/7000
Epoch 24/7000
Epoch 25/7000
Epoch 26/7000
Epoch 27/7000
Epoch 28/7000
Epoch 29/7000
Epoch 30/7000
Epoch 31/7000
Epoch 32/7000
Epoch 33/7000
Epoch 34/7000
Epoch 35/7000
Epoch 36/7000
Epoch 37/7000
Epoch 38/7000
Epoch 39/7000
Epoch 40/7000
Epoch 41/7000
Epoch 42/7000
Epoch 43/7000
Epoch 44/7000
Epoch 45/7000
Epoch 46/7000
Epoch 47/7000
Epoch 48/7000
Epoch 49/7000
Epoch 50/7000
Epoch 51/7000
Epoch 52/7000
Epoch 53/7000
Epoch 54/7000
Epoch 55/7000
Epoch 56/7000
Epoch 57/7000
Epoch 58/7000
Epoch 59/7000
Epoch 60/7000
Epoch 61/7000
Epoch 62/7000
Epoch 63/7000
Epoch 64/7000
Epoch 65/7000
Epoch 66/7000
Epoch 67/7000
Epoch 68/7000
Epoch 69/7000
Epoch 70/7000
Epoch 71/7000
Epoch 72/7000
E

<tensorflow.python.keras.callbacks.History at 0x2092d330d30>

In [323]:

from sklearn.metrics import r2_score, mean_squared_error
preds = model.predict(X_train)
print(r2_score(y_train, preds))
print(mean_squared_error(y_train, preds))
preds = model.predict(X_test)
print(r2_score(y_test, preds))
print(mean_squared_error(y_test, preds))

0.9864835285706515
1.1856323364994048
0.9391832285176521
4.876293238012163


In [324]:
y_test

array([ 5.57802   , 21.2       , 18.1       , 31.1       , 33.7       ,
        4.97555769, 14.54554545, 30.8       , 32.2       , 14.4       ,
       18.        , 29.3       , 16.9       , 12.09408511,  3.59496154,
       14.7       , 20.2       , 17.6       , 39.        , 16.70965714,
       19.63297872, 12.6       ,  7.40778846,  6.77226667, 10.24691489,
       17.4       , 15.7       , 17.4       , 19.2       , 27.5       ,
       19.615     , 25.9       , 13.3       , 13.1       , 34.9       ,
       20.9       ,  7.57810204, 21.4858    , 17.2       , 19.2       ,
       13.8       , 13.8       , 10.68321429, 25.59384211, 28.        ,
       13.18948837, 12.8       ,  5.95341509, 21.1       , 38.6       ,
        9.23369091, 36.8       , 22.09183333, 15.1       ,  5.32593478])

In [325]:
print(preds)

[[ 6.4474597]
 [21.634714 ]
 [18.389818 ]
 [31.666904 ]
 [40.79028  ]
 [ 6.074297 ]
 [15.0157795]
 [31.481035 ]
 [39.029305 ]
 [14.41388  ]
 [18.270971 ]
 [34.677464 ]
 [17.475958 ]
 [11.273173 ]
 [ 4.2162004]
 [15.744637 ]
 [21.014505 ]
 [17.0076   ]
 [40.272293 ]
 [14.875077 ]
 [20.51036  ]
 [13.258844 ]
 [ 8.545646 ]
 [10.501292 ]
 [10.067573 ]
 [18.182646 ]
 [16.244429 ]
 [17.819696 ]
 [17.930105 ]
 [29.976091 ]
 [14.346352 ]
 [27.725554 ]
 [14.172156 ]
 [14.390395 ]
 [36.05298  ]
 [22.440212 ]
 [ 8.617895 ]
 [18.981207 ]
 [16.06206  ]
 [18.77174  ]
 [14.072571 ]
 [15.413673 ]
 [10.6488085]
 [21.801857 ]
 [23.938494 ]
 [11.875315 ]
 [13.062663 ]
 [ 6.2107167]
 [21.321142 ]
 [42.350014 ]
 [10.793377 ]
 [37.3474   ]
 [22.010284 ]
 [15.123635 ]
 [ 7.9690337]]
