In [50]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import GridSearchCV,StratifiedKFold
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from scikeras.wrappers import KerasClassifier

In [51]:
df=pd.read_csv('processed.csv')
print('Data shape:',df.shape)

Data shape: (1069, 22)


In [52]:
list(enumerate(df.columns))

[(0, 'w/b'),
 (1, 'Water'),
 (2, 'Cement type'),
 (3, 'Cement'),
 (4, 'Slag'),
 (5, 'Fly ash'),
 (6, 'Silica fume'),
 (7, 'Lime filler'),
 (8, 'FA'),
 (9, 'CA'),
 (10, 'Plasticizer'),
 (11, 'Superplasticizer'),
 (12, 'Air entraining'),
 (13, 'Comp. str. test age'),
 (14, 'Compressive strength'),
 (15, 'Air content'),
 (16, 'Spreed'),
 (17, 'Slump'),
 (18, 'Fresh density'),
 (19, 'Dry  density'),
 (20, 'Migration test age'),
 (21, 'Migration resistance')]

1-hot encoding

In [53]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [2])], remainder='passthrough')
X=ct.fit_transform(df)
# remove dummy variable and output
y=X[:,-1]
X=X[:,1:-1]
print(X.shape)
print(y.shape)

(1069, 30)
(1069,)


ANN

In [54]:
def get_ann(optimizer='rmsprop', init='glorot_uniform'):
    ann=keras.models.Sequential()
    ann.add(keras.layers.Dense(60,activation='relu',kernel_initializer=init,
                            input_shape=(X.shape[1],)))
    ann.add(keras.layers.Dense(32,activation='relu'))
    ann.add(keras.layers.Dense(32,activation='relu'))
    ann.add(keras.layers.Dense(5,activation='softmax'))
    ann.compile(optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return ann

Grid Search

In [55]:
param_grid={
    'optimizer': ['rmsprop', 'adam'],
    'model__init': ['glorot_uniform', 'normal', 'uniform'],
    'epochs': [200,400],
    'batch_size': [16,32,64]
}

splits = list(StratifiedKFold(shuffle=True,random_state=0).split(X,y))
train_index, test_index = splits[0]
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

grid=GridSearchCV(KerasClassifier(get_ann,verbose=0,random_state=0),
                param_grid,cv=5,n_jobs=2,scoring='accuracy')
grid.fit(X_train, y_train)
print(grid.best_params_)
print(grid.best_score_)



{'batch_size': 16, 'epochs': 400, 'model__init': 'glorot_uniform', 'optimizer': 'rmsprop'}
0.8280701754385964


In [56]:
best_model=KerasClassifier(get_ann,verbose=0,random_state=0,
                           batch_size=grid.best_params_['batch_size'],
                           epochs=grid.best_params_['epochs'],
                           model__init=grid.best_params_['model__init'],
                           optimizer=grid.best_params_['optimizer'])
acc_test=[]
acc_train=[]
for train_index, test_index in splits:
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    best_model.fit(X_train,y_train)
    acc_test.append(accuracy_score(y_test,best_model.predict(X_test)))
    acc_train.append(accuracy_score(y_train,best_model.predict(X_train)))
print('Train acc:',np.mean(acc_train))
print('Test acc:',np.mean(acc_test))

Train acc: 0.997427447122479
Test acc: 0.885889166776359


Removing cement type

In [57]:
X=df.drop('Cement type',axis=1).iloc[:,:-1].values
y=df.iloc[:,-1].values
print(X.shape)
print(y.shape)

(1069, 20)
(1069,)


In [58]:
splits = list(StratifiedKFold(shuffle=True,random_state=0).split(X,y))
train_index, test_index = splits[0]
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
grid=GridSearchCV(KerasClassifier(get_ann,verbose=0,random_state=0),
                param_grid,cv=5,n_jobs=2,scoring='accuracy')
grid.fit(X_train, y_train)
print(grid.best_params_)
print(grid.best_score_)



{'batch_size': 32, 'epochs': 400, 'model__init': 'uniform', 'optimizer': 'rmsprop'}
0.8467836257309942


In [59]:
best_model=KerasClassifier(get_ann,verbose=0,random_state=0,
                           batch_size=grid.best_params_['batch_size'],
                           epochs=grid.best_params_['epochs'],
                           model__init=grid.best_params_['model__init'],
                           optimizer=grid.best_params_['optimizer'])
acc_test=[]
acc_train=[]
for train_index, test_index in splits:
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    best_model.fit(X_train,y_train)
    acc_test.append(accuracy_score(y_test,best_model.predict(X_test)))
    acc_train.append(accuracy_score(y_train,best_model.predict(X_train)))
print('Train acc:',np.mean(acc_train))
print('Test acc:',np.mean(acc_test))

Train acc: 0.9929832759468766
Test acc: 0.8989688912289939
