In [1]:
import tensorflow as tf
from tensorflow.keras.utils import plot_model
from tensorflow.keras import Model, Input, layers
from tensorflow.keras.layers import MaxPool2D, GlobalMaxPool2D, BatchNormalization, Activation
from tensorflow.keras.layers import Conv2D, Concatenate,Flatten, Dense, Dropout
from tensorflow.keras.regularizers import l2, l1

In [4]:
import pandas as pd
import numpy as np
import os
import cv2
from joblib import load, dump
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import recall_score,precision_score
from sklearn.metrics import roc_auc_score, accuracy_score, average_precision_score
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import StratifiedKFold,KFold,RepeatedKFold,RepeatedStratifiedKFold, train_test_split
from aggmap import AggMap, AggMapNet

In [5]:
source_path = '/raid/lzyt_dir/Data_DRIAMS/Redo'
Xpath = '/mnt/lzyt/DRIAMS/A'
specie = 'Ecoli'
gpuid = '0'

In [6]:
os.environ["CUDA_VISIBLE_DEVICES"]=gpuid
os.environ['KMP_DUPLICATE_LIB_OK']="TRUE"
physical_gpus = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_gpus[0], True)    #动态调用GPU

In [2]:
def Inception(inputs, units = 8, strides = 1):
    """
    naive google inception block
    """
    x1 = Conv2D(units, 5, padding='same', activation = 'relu', strides = strides)(inputs)
    x2 = Conv2D(units, 3, padding='same', activation = 'relu', strides = strides)(inputs)
    x3 = Conv2D(units, 1, padding='same', activation = 'relu', strides = strides)(inputs)
    outputs = Concatenate()([x1, x2, x3])
    outputs = BatchNormalization(axis=-1, epsilon=1e-5, momentum=0.8)(outputs)    
    return outputs

In [12]:
def MK(inputs, units = [12, 24, 48], strides = 1):
    """
    naive google inception block
    """
    x1 = Conv2D(units[0], 7, padding='same', activation = 'relu', strides = strides)(inputs)
    x2 = Conv2D(units[1], 5, padding='same', activation = 'relu', strides = strides)(inputs)
    x3 = Conv2D(units[2], 3, padding='same', activation = 'relu', strides = strides)(inputs)
    outputs = Concatenate()([x1, x2, x3])
    outputs = BatchNormalization(axis=-1, epsilon=1e-5, momentum=0.8)(outputs)    
    return outputs

In [3]:
def _MALDINet(input_shape,  
               n_outputs = 1, 
               n_inception = 2,
               dense_layers = [256, 128, 64, 32], 
               dense_avf = 'relu', 
               dropout = 0,
               last_avf = 'softmax'):

    """
    parameters
    ----------------------
    input_shape: w, h, c
    n_outputs: output units
    n_inception: number of the inception layers
    dense_layers: list, how many dense layers and units
    dense_avf: activation function for dense layers
    last_avf: activation function for last layer
    dropout: dropout of the dense layers
    """
    tf.keras.backend.clear_session()
    assert len(input_shape) == 3
    inputs = Input(input_shape)
    
    incept = MK(inputs=inputs)

    for i in range(n_inception):
        incept = MaxPool2D(pool_size = 3, strides = 2, padding = 'same')(incept) #p1
        incept = Inception(incept, strides = 1, units = 32*(2**i))

    #flatten
    x = GlobalMaxPool2D()(incept)
    
    ## dense layer
    for units in dense_layers:
        x = Dense(units, activation = dense_avf)(x)
        if dropout:
            x = Dropout(rate = dropout)(x)

    #last layer
    outputs = Dense(n_outputs,activation = last_avf)(x)
    
    model = tf.keras.Model(inputs = inputs, outputs = outputs)
    
    return model

In [59]:
drug = 'cip'
#Labels loading
Labels = pd.read_csv(os.path.join(source_path, str(specie), str(drug), 'clean_labels.csv'),header=None)
train_Y = np.zeros((len(Labels),2))                                    #将标签以独热编码存起来 
test_Y = np.zeros(len(Labels))                                         #用于分层测试的标签

i = 0
for label in Labels[0]:
    if label == 0:
        train_Y[i,0] = 1
        test_Y[i] = 0
    else:
        train_Y[i,1] = 1
        test_Y[i] = 1
    i += 1

y = train_Y.astype(int)
Y = test_Y.astype(int)
X = load(os.path.join(Xpath, str(specie),'Xdata', f"X_{specie}_{drug}.data"))

In [61]:
# 1. 准备数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. 编译模型
model = _MALDINet(X.shape[1:], n_outputs = y.shape[-1], dropout = 0)
opt = tf.keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
auc_metric = tf.keras.metrics.AUC(curve='ROC')
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=[auc_metric])

# 3. 训练模型
history = model.fit(X_train, y_train, batch_size=64, epochs=100)

# 计算测试集上的 AUC
y_pred = model.predict(X_test)
test_auc = roc_auc_score(y_test, y_pred)
test_auc

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
13/62 [=====>........................] - ETA: 1s - loss: 4.9104e-04 - auc: 1.000 - ETA: 1s - loss: 4.8631e-04 - auc: 1.000 - ETA: 1s - loss: 4.7098e-04 - auc: 1.000 - ETA: 1s - loss: 4.6003e-04 - auc: 1.000 - ETA: 1s - loss: 4.6216e-04 - auc: 1.000 - ETA: 1s - loss: 4.5986e-04 - auc: 1.000 - ETA: 1s - loss: 4.4835e-04 - auc: 1.0000

pip

In [55]:
drug = 'pip'
#Labels loading
Labels = pd.read_csv(os.path.join(source_path, str(specie), str(drug), 'clean_labels.csv'),header=None)
train_Y = np.zeros((len(Labels),2))                                    #将标签以独热编码存起来 
test_Y = np.zeros(len(Labels))                                         #用于分层测试的标签

i = 0
for label in Labels[0]:
    if label == 0:
        train_Y[i,0] = 1
        test_Y[i] = 0
    else:
        train_Y[i,1] = 1
        test_Y[i] = 1
    i += 1

y = train_Y.astype(int)
Y = test_Y.astype(int)

X = load(os.path.join(Xpath, str(specie),'Xdata', f"X_{specie}_{drug}.data"))

In [58]:
# 1. 准备数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. 编译模型
model = _MALDINet(X.shape[1:], n_outputs = y.shape[-1], dropout = 0.3)
opt = tf.keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
auc_metric = tf.keras.metrics.AUC(curve='ROC')
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=[auc_metric])

# 3. 训练模型
history = model.fit(X_train, y_train, batch_size=64, epochs=80)

# 计算测试集上的 AUC
y_pred = model.predict(X_test)
test_auc = roc_auc_score(y_test, y_pred)
test_auc

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


0.7267173963602535

tob

In [32]:
drug = 'tob'
#Labels loading
Labels = pd.read_csv(os.path.join(source_path, str(specie), str(drug), 'clean_labels.csv'),header=None)
train_Y = np.zeros((len(Labels),2))                                    #将标签以独热编码存起来 
test_Y = np.zeros(len(Labels))                                         #用于分层测试的标签

i = 0
for label in Labels[0]:
    if label == 0:
        train_Y[i,0] = 1
        test_Y[i] = 0
    else:
        train_Y[i,1] = 1
        test_Y[i] = 1
    i += 1

y = train_Y.astype(int)
Y = test_Y.astype(int)

X = load(os.path.join(Xpath, str(specie),'Xdata', f"X_{specie}_{drug}.data"))

In [34]:
# 1. 准备数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. 编译模型
model = _MALDINet(X.shape[1:], n_outputs = y.shape[-1], dropout = 0.3)
opt = tf.keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
auc_metric = tf.keras.metrics.AUC(curve='ROC')
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=[auc_metric])

# 3. 训练模型
history = model.fit(X_train, y_train, batch_size=64, epochs=60)

# 计算测试集上的 AUC
y_pred = model.predict(X_test)
test_auc = roc_auc_score(y_test, y_pred)
test_auc

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


0.7048648648648649

In [35]:
drug = 'ceft'
#Labels loading
Labels = pd.read_csv(os.path.join(source_path, str(specie), str(drug), 'clean_labels.csv'),header=None)
train_Y = np.zeros((len(Labels),2))                                    #将标签以独热编码存起来 
test_Y = np.zeros(len(Labels))                                         #用于分层测试的标签

i = 0
for label in Labels[0]:
    if label == 0:
        train_Y[i,0] = 1
        test_Y[i] = 0
    else:
        train_Y[i,1] = 1
        test_Y[i] = 1
    i += 1

y = train_Y.astype(int)
Y = test_Y.astype(int)

X = load(os.path.join(Xpath, str(specie),'Xdata', f"X_{specie}_{drug}.data"))

In [36]:
# 1. 准备数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. 编译模型
model = _MALDINet(X.shape[1:], n_outputs = y.shape[-1], dropout = 0.3)
opt = tf.keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
auc_metric = tf.keras.metrics.AUC(curve='ROC')
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=[auc_metric])

# 3. 训练模型
history = model.fit(X_train, y_train, batch_size=64, epochs=60)

# 计算测试集上的 AUC
y_pred = model.predict(X_test)
test_auc = roc_auc_score(y_test, y_pred)
test_auc

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


0.8702636983535439

In [37]:
drug = 'cefe'
#Labels loading
Labels = pd.read_csv(os.path.join(source_path, str(specie), str(drug), 'clean_labels.csv'),header=None)
train_Y = np.zeros((len(Labels),2))                                    #将标签以独热编码存起来 
test_Y = np.zeros(len(Labels))                                         #用于分层测试的标签

i = 0
for label in Labels[0]:
    if label == 0:
        train_Y[i,0] = 1
        test_Y[i] = 0
    else:
        train_Y[i,1] = 1
        test_Y[i] = 1
    i += 1

y = train_Y.astype(int)
Y = test_Y.astype(int)

X = load(os.path.join(Xpath, str(specie),'Xdata', f"X_{specie}_{drug}.data"))

In [38]:
# 1. 准备数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. 编译模型
model = _MALDINet(X.shape[1:], n_outputs = y.shape[-1], dropout = 0.3)
opt = tf.keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
auc_metric = tf.keras.metrics.AUC(curve='ROC')
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=[auc_metric])

# 3. 训练模型
history = model.fit(X_train, y_train, batch_size=64, epochs=60)

# 计算测试集上的 AUC
y_pred = model.predict(X_test)
test_auc = roc_auc_score(y_test, y_pred)
test_auc

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


0.8655577919415558

In [None]:
import itertools

In [None]:
DRUGS = ['cip', 'ceft', 'cefe', 'pip', 'tob']
DROPOUT = [0, 0.1, 0.2, 0.3, 0.4]
EPOCHS = [40, 60, 80, 100]

In [None]:
for drug in DRUGS :
    #加载标签及图像数据
    Labels = pd.read_csv(os.path.join(source_path, str(specie), str(drug), 'clean_labels.csv'),header=None)
    train_Y = np.zeros((len(Labels),2))
    i = 0
    for label in Labels[0]:
        if label == 0:
            train_Y[i,0] = 1
        else:
            train_Y[i,1] = 1
        i += 1
    y = train_Y.astype(int)
    X = load(os.path.join(Xpath, str(specie),'Xdata', f"X_{specie}_{drug}.data"))
    #分割数据集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    for dropout, epochs in itertools.product(DROPOUT, EPOCHS):
        #编译及训练模型
        model = _MALDINet(X.shape[1:], n_outputs=y.shape[-1], dropout=dropout)
        opt = tf.keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
        auc_metric = tf.keras.metrics.AUC(curve='ROC')
        model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=[auc_metric])
        history = model.fit(X_train, y_train, batch_size=64, epochs=epochs, verbose=1)
        # 计算测试集AUC并记录
        y_pred = model.predict(X_test)
        test_auc = roc_auc_score(y_test, y_pred)
        with open("output_kpn.txt", "a") as file:
            file.write(drug + ' dropout = ' + str(dropout) + ' epochs = ' + str(epochs) + ' auc = ' + str(test_auc) + "\n")