In [4]:
import logging
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import KFold
import numpy as np

from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D
from keras.layers import Activation, Dropout, Flatten, Dense

physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)

Using TensorFlow backend.


In [5]:
# Load data
df = pd.read_csv('../data/crx.data')
logging.debug(df.head())
print(df['label'].values[0])


df.replace('?', np.nan, inplace=True)
df['label'].replace('+', 1, inplace=True)
df['label'].replace('-', 0, inplace=True)
df.dropna(how='any', inplace=True)

+


In [6]:
# Convert data
df = pd.get_dummies(df, columns=['A1', 'A4', 'A5', 'A6', 'A7', 'A9', 'A10', 'A11', 'A12', 'A13'])
for col in df.columns:
    if col == 'label':
        continue
    df[col].astype(float)
print(df.head())
print(df.shape)

      A2     A3    A8    A14  A15  label  A1_a  A1_b  A4_l  A4_u  ...  A11_19  \
0  30.83  0.000  1.25  00202    0      1     0     1     0     1  ...       0   
1  58.67  4.460  3.04  00043  560      1     1     0     0     1  ...       0   
2  24.50  0.500  1.50  00280  824      1     1     0     0     1  ...       0   
3  27.83  1.540  3.75  00100    3      1     0     1     0     1  ...       0   
4  20.17  5.625  1.71  00120    0      1     0     1     0     1  ...       0   

   A11_20  A11_23  A11_40  A11_67  A12_f  A12_t  A13_g  A13_p  A13_s  
0       0       0       0       0      1      0      1      0      0  
1       0       0       0       0      1      0      1      0      0  
2       0       0       0       0      1      0      1      0      0  
3       0       0       0       0      0      1      1      0      0  
4       0       0       0       0      1      0      0      0      1  

[5 rows x 69 columns]
(653, 69)


In [16]:
def create_model2(optimizer='rmsprop', init='glorot_uniform'):
    model = Sequential()
    model.add(Conv1D(filters=8,
                     kernel_size=2,
                     input_shape=(68, 1),
                     kernel_initializer=init,
                     activation='relu'
                     ))
    model.add(MaxPooling1D())

    model.add(Conv1D(8, 2, activation='relu'))
    model.add(MaxPooling1D())

    model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
    model.add(Dense(units=8, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(units=1, activation='sigmoid'))
    # opt = Keras.optimizers.SGD(lr=0.01, momentum=0.9)
    model.compile(loss='binary_crossentropy', optimizer='SGD', metrics=['accuracy'])

    return model

In [52]:
def train_with_kfold(model):
    n_folds = 5

    train_data = df.drop(['label'],axis=1).values
    train_label = df['label'].values

    kfold = KFold(n_folds, shuffle=True, random_state=1)
    for train_ix, test_ix in kfold.split(train_data):
        # select rows for train and test
        trainX, trainY, testX, testY = train_data[train_ix], train_label[train_ix], \
                                       train_data[test_ix], train_label[test_ix]

        # fit model
        trainX = np.asarray(trainX, dtype=float)
        testX = np.asarray(testX, dtype=float)

        trainX = np.expand_dims(trainX, axis=2)
        testX = np.expand_dims(testX, axis=2)

        trainY = np.asarray(trainY)
        testY = np.asarray(testY)

        history = model.fit(trainX, trainY, epochs=10, batch_size=32, validation_data=(testX, testY), verbose=0)
        
        evaluate(model, testX, testY)
        

In [53]:
def evaluate(model, testX, testY):
    # evaluate model
    _, acc = model.evaluate(testX, testY, verbose=0)
    print('Accuracy> %.3f' % (acc * 100.0))
#     print(testX[:1])
#     y_pred = model.predict(testX)
#     print(y_pred)
#     f1 = f1_score(testY, y_pred , average="macro")
#     print("F1 score> {}".format(f1))

In [56]:
def create_model():
    inputs=tf.keras.Input(shape=68)

    x = tf.keras.layers.Dense(30, input_dim=68, activation=tf.nn.relu)(inputs)
    x = tf.keras.layers.Dense(100, activation=tf.nn.relu)(x)
    x = tf.keras.layers.Dense(200, activation=tf.nn.relu)(x)
    output = tf.keras.layers.Dense(1, activation=tf.nn.softmax)(x)

    model = tf.keras.Model(inputs=inputs, outputs=output)
    logging.info(model.summary())

    # Compile model
    opt = tf.keras.optimizers.SGD(lr=0.01, momentum=0.9)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

    return model

In [57]:
from sklearn.metrics import f1_score
model = create_model()
train_with_kfold(model)

Model: "model_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_15 (InputLayer)        [(None, 68)]              0         
_________________________________________________________________
dense_32 (Dense)             (None, 30)                2070      
_________________________________________________________________
dense_33 (Dense)             (None, 100)               3100      
_________________________________________________________________
dense_34 (Dense)             (None, 200)               20200     
_________________________________________________________________
dense_35 (Dense)             (None, 1)                 201       
Total params: 25,571
Trainable params: 25,571
Non-trainable params: 0
_________________________________________________________________
Accuracy> 54.962
Accuracy> 57.252
Accuracy> 50.382
Accuracy> 56.154
Accuracy> 54.615
