In [1]:
import keras
import numpy as np
import random

Using TensorFlow backend.


In [10]:
def read_list_file(filepath, group):
    with open(filepath, "r") as filin:
        listin = {}
        for line in filin:
            listin[line[:-1]] = group
    return listin

def split_train_val(all_dic, train_size, test_size):

    if train_size + test_size > len(all_dic):
        print("train_size and test_size are too big, changing to 50/50")
        train_size = len(all_dic) / 2
        test_size = train_size

    X_train_id = random.sample(list(all_dic), train_size)
    X_fullval_id = [item for item in list(all_dic) if item not in X_train_id]
    X_val_id = random.sample(X_fullval_id, test_size)

    X_train = np.zeros((len(X_train_id), 14, 32, 32, 32))
    X_val = np.zeros((len(X_val_id), 14, 32, 32, 32))
    Y_train = np.zeros((len(X_train_id), 3))
    Y_val = np.zeros((len(X_val_id), 3))

    for i in range(len(X_train_id)):
        X_train[i,:,:,:,:] = np.load(
            "/media/anthony/POULOP/deepdrug3d_voxel_data/" + X_train_id[i] + ".npy"
            )
        Y_train[i,:] = all_dic[X_train_id[i]]

    for i in range(len(X_val_id)):
        X_val[i,:,:,:,:] = np.load(
            "/media/anthony/POULOP/deepdrug3d_voxel_data/" + X_val_id[i] + ".npy"
            )
        Y_val[i,:] = all_dic[X_val_id[i]]
    
    return X_train, Y_train, X_val, Y_val


def notdeepdrug3D():
    input_layer = keras.Input(shape=(14, 32, 32, 32))
    conv_1 = keras.layers.Conv3D(
        filters = 32,
        kernel_size = 5,
        activation = "relu", 
        data_format = "channels_first",
        padding = "valid"
        )(input_layer)
    dropout_1 = keras.layers.Dropout(rate=0.2)(conv_1)
    conv_2 = keras.layers.Conv3D(
        filters = 32,
        kernel_size = 3,
        activation = "relu", 
        data_format="channels_first",
        padding="valid"
        )(dropout_1)
    max_pooling_1 = keras.layers.MaxPooling3D(
        pool_size=(2,2,2),
        strides=None,
        padding="valid",
        data_format="channels_first"
        )(conv_2)
    dropout_2 = keras.layers.Dropout(rate=0.4)(max_pooling_1)
    flatten_1 = keras.layers.Flatten()(dropout_2)
    dense_1 = keras.layers.Dense(units=100, activation="relu")(flatten_1)
    output_layer = keras.layers.Dense(units=3, activation="softmax")(dense_1)
    notdeepdrug_model = keras.Model(inputs=input_layer,outputs=output_layer)
    notdeepdrug_model.compile(
            optimizer="adam", 
            loss="categorical_crossentropy",
            metrics=["accuracy"]
            )
    return notdeepdrug_model

def mk_confu_table(predicted, observed):
    nb_class = len(predicted[0])
    confu_table = np.zeros((nb_class, nb_class))
    print(predicted.shape, observed.shape)
    for i in range(len(predicted)):
        predmax = -1
        for j in range(len(nb_class)):
            if predmax < predicted[i,j]:
                predmax = predicted[i,j]
                predicted_class = j
            if observed[i,j] == 1.:
                observed_class = j
        confu_table[predicted_class, observed_class] += 1                    
    return confu_table

def compute_model_metrics(predicted, observed):
    confu_table = mk_confu_table(predicted, observed)

In [11]:
control_file = "control.list.txt"
heme_file = "heme.list.txt"
nucleotide_file = "nucleotide.list.txt"
steroid_file = "steroid.list.txt"

control_id = read_list_file(control_file,[0, 0, 1])
heme_id = read_list_file(heme_file, [0, 1, 0])
nucleotide_id = read_list_file(nucleotide_file, [1, 0, 0])
steroid_id = read_list_file(steroid_file, 4)


In [12]:
print(len(control_id), len(heme_id), len(nucleotide_id), len(steroid_id))

all_dic = control_id
all_dic.update(heme_id)
all_dic.update(nucleotide_id)

1946 596 1553 69


In [13]:
X_train, Y_train, X_val, Y_val = split_train_val(all_dic, 100 , 50)

notdeepdrug3D_model  = notdeepdrug3D()
model_file = "../results/malo_1.h5"
best_model = keras.callbacks.ModelCheckpoint(
        filepath=model_file, 
        monitor = "val_loss",
        verbose = 0, 
        save_best_only=True
        )

history = notdeepdrug3D_model.fit(
    x=X_train, 
    y=Y_train, 
    batch_size=20, 
    epochs=5, 
    validation_split=0.05,
    shuffle=True)


Train on 95 samples, validate on 5 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [14]:
pred_train = notdeepdrug3D_model.predict(X_train)
pred_val = notdeepdrug3D_model.predict(X_val)

In [18]:
evaluation = notdeepdrug3D_model.evaluate(X_val, Y_val, batch_size = 20)
print(evaluation)

[1.0616469383239746, 0.25999999046325684]


In [15]:
for p in pred_val:
    print(p)

[0.4193983  0.22628836 0.35431325]
[0.4067036  0.25333726 0.3399591 ]
[0.4171219  0.23440835 0.34846982]
[0.39663458 0.2536271  0.34973836]
[0.37563545 0.28099233 0.34337226]
[0.4144988  0.23237462 0.3531266 ]
[0.3905207  0.25746366 0.35201567]
[0.37575665 0.28707793 0.33716542]
[0.39162585 0.25593415 0.35243994]
[0.3847265  0.26711306 0.34816045]
[0.4225136  0.21458733 0.36289907]
[0.40243086 0.24455133 0.3530178 ]
[0.41877255 0.22987923 0.35134825]
[0.41474265 0.23041752 0.35483977]
[0.40776804 0.24490711 0.34732482]
[0.4067658  0.24517864 0.34805557]
[0.37894747 0.2775143  0.34353822]
[0.40169403 0.2436159  0.35469002]
[0.38826972 0.26364678 0.34808344]
[0.41248927 0.2446159  0.34289476]
[0.40145978 0.24985053 0.34868973]
[0.37684402 0.28202805 0.34112787]
[0.40324867 0.24794953 0.34880176]
[0.42411482 0.23459241 0.34129268]
[0.40363228 0.23997489 0.35639283]
[0.41068953 0.22909845 0.36021203]
[0.40923545 0.23102903 0.3597355 ]
[0.41134444 0.23505534 0.3536003 ]
[0.4035956  0.251285