In [None]:
import tensorflow as T
from tensorflow import *
from tensorflow.keras import initializers
from tensorflow.keras import regularizers
from tensorflow.keras import layers
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.losses import *
import numpy as np
import pandas as pd
import sklearn.metrics as SK
from sklearn.metrics import *
import matplotlib.pyplot as plt
import os 
import io
from functions.GPU import *
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

In [None]:
#Import train, validation and test sets

training = r'C:\Users\meryc\OneDrive\Desktop\Trabalhos\COVID\COVID_classification_base\train_fold_4.csv'
validation = r'C:\Users\meryc\OneDrive\Desktop\Trabalhos\COVID\COVID_classification_base\valid_fold_4.csv'
test = r'C:\Users\meryc\OneDrive\Desktop\Trabalhos\COVID\COVID_classification_base\test_fold_4.csv'


train_dataset = pd.read_csv(training, delimiter=',', low_memory=False)
val_dataset = pd.read_csv(validation, delimiter=',', low_memory=False)
test_dataset = pd.read_csv(test, delimiter=',', low_memory=False)


train_dataset.head()

In [None]:
#Create y_train, y_test and y_test sets

task_start = 2
tasks = 1
task_index = tasks + 2
#print('tasks: %s' % tasks)

# load training dataset
train_dataset = pd.read_csv(training, delimiter=',', low_memory=False)
y_train = np.array(train_dataset.iloc[:,2:task_index].values)
print(f"loaded y_train data: {y_train.shape}")

# load validation dataset
val_dataset = pd.read_csv(validation, delimiter=',', low_memory=False)
y_val = np.array(val_dataset.iloc[:,2:task_index].values)
print(f"loaded y_val data: {y_val.shape}")

# load test dataset
test_dataset = pd.read_csv(test, delimiter=',', low_memory=False)
y_test = np.array(test_dataset.iloc[:,2:task_index].values)
print(f"loaded y_test data: {y_test.shape}")

In [None]:
# calculate ECFP (defaut) fingerprints using RDKit

from functions.fingerprints import *

train_smiles=train_dataset["SMILES"].values
val_smiles=val_dataset["SMILES"].values
test_smiles=test_dataset["SMILES"].values
X_train = assing_fp(train_smiles,FP_SIZE,RADIUS)
X_val = assing_fp(val_smiles,FP_SIZE,RADIUS)
X_test = assing_fp(test_smiles,FP_SIZE,RADIUS)

X_train.shape, X_val.shape, X_test.shape

In [None]:
# custom loss function for missing values in input data (i.e. target labels or values)

from functions.utils import *


# parameters for train network

bit_vector = X_train.shape[1]

def create_model():
    return T.keras.models.Sequential([
            T.keras.layers.Dense(10,input_dim=bit_vector, activation='LeakyReLU',kernel_regularizer= T.keras.regularizers.L1(0.002)),
            T.keras.layers.Dropout(0.2),
            T.keras.layers.Dense(5, activation='LeakyReLU', kernel_regularizer=T.keras.regularizers.L1(0.002)),
            T.keras.layers.Dropout(0.2),
            T.keras.layers.Dense(3, activation='LeakyReLU', kernel_regularizer=T.keras.regularizers.L1(0.005)),
            T.keras.layers.Dropout(0.2),
            T.keras.layers.Dense(1, activation='sigmoid'),
            ])


accuracy = T.keras.metrics.Accuracy()
optimizer = Nadam(learning_rate=1e-3)
lr_metric = get_lr_metric(optimizer)

model = create_model()
model.compile(loss = classification_loss(BinaryCrossentropy), metrics = [lr_metric])    
model.summary()
T.keras.utils.plot_model(model)

In [None]:
# Early stopping parameters

callbacks_list = [
    ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, min_lr=0.00000001, verbose=1, mode='auto',cooldown=0),
    ModelCheckpoint(filepath="./models/test_model4.h5", monitor='val_loss', save_best_only=True, verbose=1, mode='auto'),
    EarlyStopping(monitor='val_loss', min_delta=0.001, patience=5, mode='min', verbose=1)]


# parameters for train network

epochs=2000
batch_size=24


history = model.fit(X_train, y_train, 
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_data=(X_val, y_val),
                    callbacks=(callbacks_list))

In [None]:
#Plot model history

hist = history.history

plt.figure(figsize=(13, 9))


for label in ['val_loss','loss']:
    plt.subplot(221)
    plt.plot(hist[label], label = label)
    plt.legend()
    plt.xlabel("Epochs")
    plt.ylabel("loss")

plt.subplot(222)
plt.plot( hist['lr'],hist['val_loss']  )
plt.legend()
plt.xlabel("lr")
plt.ylabel("val_loss")
    
plt.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=0.25,
                    wspace=0.35)

In [None]:
#Statistical characteristics of tasks

threshold = 0.5

prediction_train = model.predict(X_train)
prediction_train = np.where(prediction_train > threshold, 1.0,0.0)
prediction_val = model.predict(X_val)
prediction_val = np.where(prediction_val > threshold, 1.0,0.0)
prediction_test = model.predict(X_test)
prediction_test = np.where(prediction_test > threshold, 1.0,0.0)


for index1 in range(prediction_train.shape[1]):
    
    a = pd.DataFrame(y_train[:,index1],prediction_train[:,index1]) 
    a['y'] = a.index
    b = a.dropna()
    confusion = SK.confusion_matrix(b["y"], b[0])
    #[row, column]
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]

    
    for index2 in range(prediction_val.shape[1]):
        
        a_val = pd.DataFrame(y_val[:,index2],prediction_val[:,index2]) 
        a_val['y'] = a_val.index
        b_val = a_val.dropna()
        confusion_val = SK.confusion_matrix(b_val["y"], b_val[0])
        #[row, column]
        TP_val = confusion_val[1, 1]
        TN_val = confusion_val[0, 0]
        FP_val = confusion_val[0, 1]
        FN_val = confusion_val[1, 0]

        
        for index3 in range(prediction_test.shape[1]):
            
            a_test = pd.DataFrame(y_test[:,index3],prediction_test[:,index3]) 
            a_test['y'] = a_test.index
            b_test = a_test.dropna()
            confusion_test = SK.confusion_matrix(b_test["y"], b_test[0])
            #[row, column]
            TP_test = confusion_test[1, 1]
            TN_test = confusion_test[0, 0]
            FP_test = confusion_test[0, 1]
            FN_test = confusion_test[1, 0]

            
            if index1 == index2 and index1 == index3:
                
                print(("Results for task {} (training)").format(index1+1))
                print("ACC\t%.2f" % ((TN+TP)/(TN+TP+FN+FP)))
                print("MCC\t%.2f" % SK.matthews_corrcoef(b["y"], b[0]))
                print("kappa\t%.2f" % SK.cohen_kappa_score(b["y"], b[0]))
                print("SE\t%.2f" % (TP/(TP+FN)))
                print("SP\t%.2f" % (TN/(TN+FP)))
                print("PPV\t%.2f" % (TP/(TP+FP)))
                print("NPV\t%.2f" % (TN/(TN+FN)))
                print("TPR\t%.2f" %(TP/(TP+FN)))
                print("FPR\t%.2f" %(FP/(FP+TN)))
                print("F1\t%.2f" % SK.f1_score(b["y"], b[0]))
                
                print(("Results for task {} (validation)").format(index2+1))
                print("ACC\t%.2f" % ((TN_val+TP_val)/(TN_val+TP_val+FN_val+FP_val)))
                print("MCC\t%.2f" % SK.matthews_corrcoef(b_val["y"], b_val[0]))
                print("kappa\t%.2f" % SK.cohen_kappa_score(b_val["y"], b_val[0]))
                print("SE\t%.2f" % (TP_val/(TP_val+FN_val)))
                print("SP\t%.2f" % (TN_val/(TN_val+FP_val)))
                print("PPV\t%.2f" % (TP_val/(TP_val+FP_val)))
                print("NPV\t%.2f" % (TN_val/(TN_val+FN_val)))
                print("TPR\t%.2f" %(TP_val/(TP_val+FN_val)))
                print("FPR\t%.2f" %(FP_val/(FP_val+TN_val)))
                print("F1\t%.2f" % SK.f1_score(b_val["y"], b_val[0]))
                
                print(("Results for task {} (test)").format(index3+1))
                print("ACC\t%.2f" % ((TN_test+TP_test)/(TN_test+TP_test+FN_test+FP_test)))
                print("MCC\t%.2f" % SK.matthews_corrcoef(b_test["y"], b_test[0]))
                print("kappa\t%.2f" % SK.cohen_kappa_score(b_test["y"], b_test[0]))
                print("SE\t%.2f" % (TP_test/(TP_test+FN_test)))
                print("SP\t%.2f" % (TN_test/(TN_test+FP_test)))
                print("PPV\t%.2f" % (TP_test/(TP_test+FP_test)))
                print("NPV\t%.2f" % (TN_test/(TN_test+FN_test)))
                print("TPR\t%.2f" %(TP_test/(TP_test+FN_test)))
                print("FPR\t%.2f" %(FP_test/(FP_test+TN_test)))
                print("F1\t%.2f" % SK.f1_score(b_test["y"], b_test[0]))