# DMML2- Initial Data Exporation and Tensorflow Model Building

## Import relevant modules

In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LayerNormalization
from tensorflow.keras.optimizers import RMSprop
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.optimizers import Adam, SGD, Adagrad
from sklearn import metrics



In [35]:
raw = pd.read_csv("data/220306_slugs_dataset.csv")
raw = raw.drop(['loadCase', 'out_supReac_kN_n3', 'out_supReac_kN_n4', 'out_verDisp_mm_n5'], axis=1)

## Feature Engineering

In [36]:
# Create some new features
raw['densDiff'] = raw['slugDen'] - raw['pockDen']
raw['centrFor'] = raw['slugVel']**2 * raw['densDiff']

## EDA

In [37]:
raw.head(5)

Unnamed: 0,slugDen,slugLen,pockDen,pockLen,slugVel,basePer,baseAmp,basePha,simTime,out_oopDisp_mm_n2,out_oopDisp_mm_n5,out_endBend_kNm_n6,densDiff,centrFor
0,785.67,6.47,234.16,10.06,5.77,12.55,0.85,74.1,45.8,197.45,180.47,198.49,551.51,18361.367279
1,705.03,10.36,190.44,11.25,7.42,11.63,0.8,59.11,44.0,186.85,178.72,238.07,514.59,28331.472876
2,741.98,11.64,159.09,12.52,7.42,11.54,0.65,65.23,48.2,155.47,163.53,180.64,582.89,32091.824996
3,731.88,8.82,167.88,9.84,9.52,11.48,0.67,67.85,30.8,169.47,163.76,233.31,564.0,51115.5456
4,669.21,11.46,213.87,8.56,8.18,12.86,0.81,56.68,37.7,223.64,242.37,219.33,455.34,30467.892216


In [38]:
raw.describe()

Unnamed: 0,slugDen,slugLen,pockDen,pockLen,slugVel,basePer,baseAmp,basePha,simTime,out_oopDisp_mm_n2,out_oopDisp_mm_n5,out_endBend_kNm_n6,densDiff,centrFor
count,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,49975.0,49975.0,49975.0,50000.0,50000.0
mean,749.872761,10.000962,199.758501,10.010496,7.993627,11.999761,0.699835,60.026022,41.5709,182.077874,187.301203,218.388931,550.11426,37334.569111
std,75.250796,1.988878,49.985999,1.99871,1.990718,1.001198,0.100237,10.025567,15.655315,31.026056,44.422666,59.079226,90.207377,19030.478577
min,439.01,0.75,9.47,2.07,0.4,8.05,0.27,20.91,15.2,73.24,73.89,83.33,182.81,69.6832
25%,699.0775,8.66,166.1975,8.65,6.6575,11.33,0.63,53.21,32.6,162.6,160.96,177.36,488.99,23381.323576
50%,749.2,10.01,199.69,10.01,7.99,12.0,0.7,59.97,38.5,179.73,180.34,209.19,549.77,34584.367189
75%,801.13,11.34,233.4825,11.37,9.34,12.68,0.77,66.82,46.7,198.14,203.755,247.5,610.91,48180.168187
max,1066.9,18.22,404.01,17.86,16.81,16.38,1.11,106.85,884.8,497.92,711.91,700.15,951.3,169747.29855


In [39]:
rs = np.random.RandomState(0)
df = pd.DataFrame(rs.rand(10, 10))
corr = raw.corr()
corr.style.background_gradient(cmap='RdBu_r')


Unnamed: 0,slugDen,slugLen,pockDen,pockLen,slugVel,basePer,baseAmp,basePha,simTime,out_oopDisp_mm_n2,out_oopDisp_mm_n5,out_endBend_kNm_n6,densDiff,centrFor
slugDen,1.0,0.002973,0.003178,-0.000839,0.002337,-0.007276,0.004203,8.4e-05,-0.005237,0.107886,0.136626,0.19676,0.832437,0.268792
slugLen,0.002973,1.0,0.003706,-0.000624,0.011451,0.002834,0.000198,0.00564,0.200271,-0.098746,-0.07936,0.035261,0.000427,0.011067
pockDen,0.003178,0.003706,1.0,0.006535,-0.000133,0.0041,-0.001499,0.000611,0.002585,-0.05546,-0.07231,-0.076405,-0.551472,-0.177761
pockLen,-0.000839,-0.000624,0.006535,1.0,-0.000667,-0.000209,0.000465,-0.002671,0.209454,-0.094856,-0.084068,-0.036801,-0.004321,-0.001595
slugVel,0.002337,0.011451,-0.000133,-0.000667,1.0,-0.001702,0.001361,-0.004626,-0.775522,0.348185,0.510519,0.713178,0.002024,0.92049
basePer,-0.007276,0.002834,0.0041,-0.000209,-0.001702,1.0,0.000866,0.007024,0.002168,-0.003866,0.004313,0.011802,-0.008341,-0.004311
baseAmp,0.004203,0.000198,-0.001499,0.000465,0.001361,0.000866,1.0,0.001621,0.001092,0.689821,0.402204,0.334415,0.004336,0.004044
basePha,8.4e-05,0.00564,0.000611,-0.002671,-0.004626,0.007024,0.001621,1.0,0.007794,-0.00884,-0.03093,-0.053166,-0.000269,-0.002115
simTime,-0.005237,0.200271,0.002585,0.209454,-0.775522,0.002168,0.001092,0.007794,1.0,-0.273195,-0.378817,-0.483507,-0.005801,-0.652962
out_oopDisp_mm_n2,0.107886,-0.098746,-0.05546,-0.094856,0.348185,-0.003866,0.689821,-0.00884,-0.273195,1.0,0.913198,0.660997,0.120739,0.384655


## Data Cleaning

In [40]:

raw = raw.dropna()

scaler = MinMaxScaler()
refined_df = pd.DataFrame(scaler.fit_transform(raw), columns = raw.columns, index = raw.index)
print(refined_df)

        slugDen   slugLen   pockDen   pockLen   slugVel   basePer   baseAmp  \
0      0.552103  0.327418  0.569499  0.506016  0.327239  0.540216  0.690476   
1      0.423673  0.550086  0.458686  0.581381  0.427788  0.429772  0.630952   
2      0.482521  0.623354  0.379226  0.661811  0.427788  0.418968  0.452381   
3      0.466435  0.461935  0.401506  0.492084  0.555759  0.411765  0.476190   
4      0.366625  0.613051  0.518072  0.411020  0.474101  0.577431  0.642857   
...         ...       ...       ...       ...       ...       ...       ...   
49995  0.467136  0.475673  0.413672  0.499050  0.517367  0.567827  0.416667   
49996  0.564112  0.610189  0.455163  0.517416  0.411335  0.349340  0.464286   
49997  0.289079  0.621065  0.415294  0.462318  0.221207  0.499400  0.535714   
49998  0.422287  0.424728  0.518401  0.622546  0.669714  0.228091  0.440476   
49999  0.527592  0.551231  0.459446  0.687144  0.659354  0.456182  0.559524   

        basePha   simTime  out_oopDisp_mm_n2  out_o

## Test Train

In [41]:
# Create train and test splits (20% test)
train_data, test_data = train_test_split(refined_df, test_size=0.20, random_state=0) 

x_train = train_data.drop(["out_oopDisp_mm_n2", "out_oopDisp_mm_n5", "out_endBend_kNm_n6"], axis=1)
y_train = train_data[["out_oopDisp_mm_n2", "out_oopDisp_mm_n5", "out_endBend_kNm_n6"]]
x_test =  test_data.drop(["out_oopDisp_mm_n2", "out_oopDisp_mm_n5", "out_endBend_kNm_n6"], axis=1)
y_test =  test_data[["out_oopDisp_mm_n2", "out_oopDisp_mm_n5", "out_endBend_kNm_n6"]]

## Modelling

In [42]:
def get_model(input_dimensions, output_dimensions):
    
    model = Sequential()
    model.add(Dense(128, input_dim=input_dimensions, activation='relu'))
    model.add(Dropout(.1))
    for i in range(1,10):
        model.add(Dense(64, activation='relu'))
    
    

    
    model.add(Dense(output_dimensions, activation='linear'))
    model.compile(loss='mean_squared_error', optimizer=Adam(), metrics=['mean_squared_error'])
    return model

In [43]:
model = get_model(11, 3)
model.fit(x_train, y_train, epochs=10, batch_size=10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x21774316bb0>

## Evaluation

In [44]:
preds = model.predict(x_test)
r2 = metrics.r2_score(y_test, preds)
print(r2)

0.8203295135444039


## Optimzation

.....