In [None]:
#Import standard packages for model training

import tensorflow as T
from tensorflow import *
from tensorflow.keras import initializers
from tensorflow.keras import regularizers
from tensorflow.keras import layers
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.losses import *
import numpy as np
import pandas as pd
import sklearn.metrics as SK
from sklearn.metrics import *
import matplotlib.pyplot as plt
import os 
import io
from utils.GPU import *
from utils import utils
import tabulate
commons = utils.Commons()
ts_helper = utils.TS_Helper()
shap_helper = utils.Shap_Helper()
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

In [None]:
#Import train, validation and test sets

training = './data/classification/Tb.brucei/random_split/train_fold_4.csv'
validation = './data/classification/Tb.brucei/random_split/valid_fold_4.csv'
test = './data/classification/Tb.brucei/random_split/test_fold_4.csv'
TASK_START = 2
NUM_TASKS = 1

train_dataset,y_train = commons.load_dataset(training,TASK_START,NUM_TASKS)
validation_dataset,y_val = commons.load_dataset(validation,TASK_START,NUM_TASKS)
test_dataset,y_test = commons.load_dataset(test,TASK_START,NUM_TASKS)

train_dataset.head()

In [None]:
# calculate ECFP (defaut) fingerprints using RDKit
FP_SIZE = 2048  #bit string size
RADIUS = 2 #diameter 4
FEAT = False #used when you consider pharmacophoric features

train_smiles = train_dataset["SMILES"].values
val_smiles = validation_dataset["SMILES"].values
test_smiles = test_dataset["SMILES"].values

X_train = commons.assing_fp(train_smiles,FP_SIZE,RADIUS,FEAT)
X_val = commons.assing_fp(val_smiles,FP_SIZE,RADIUS,FEAT)
X_test = commons.assing_fp(test_smiles,FP_SIZE,RADIUS,FEAT)

X_train.shape, X_val.shape, X_test.shape

In [None]:
# custom loss function for missing values in input data (i.e. target labels or values)
# parameters for train network

bit_vector = X_train.shape[1]

def create_model():
    return T.keras.models.Sequential([
            T.keras.layers.Dense(10,input_dim=bit_vector, activation='LeakyReLU'),
            T.keras.layers.Dropout(0.5),
            T.keras.layers.Dense(10, activation='LeakyReLU'),
            T.keras.layers.Dropout(0.5),
            T.keras.layers.Dense(10, activation='LeakyReLU'),
            T.keras.layers.Dropout(0.3),
            T.keras.layers.Dense(NUM_TASKS, activation='sigmoid'),
            ])



optimizer = RMSprop(0.1)
lr_metric = ts_helper.get_lr_metric(optimizer)

model = create_model()
model.compile(loss = ts_helper.classification_loss(ts_helper.BinaryCrossentropy), metrics = [lr_metric])    
model.summary()
T.keras.utils.plot_model(model)

In [None]:
# Early stopping parameters

callbacks_list = [
    ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, min_lr=0.00000001, verbose=1, mode='auto',cooldown=0),
    ModelCheckpoint(filepath="./models/TM-FNN_classification_model.hdf5", monitor='val_loss', save_best_only=True, verbose=1, mode='auto'),
    EarlyStopping(monitor='val_loss', min_delta=0.001, patience=5, mode='min', verbose=1)]


# parameters for train network

epochs=2000
batch_size=10


history = model.fit(X_train, y_train, 
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_data=(X_val, y_val),
                    callbacks=(callbacks_list))

In [None]:
#Plot model history

ts_helper.plot_history(history)

In [None]:
#Statistical characteristics of tasks

THRESHOLD = 0.5
ts_helper.get_modelStats(model,X_train,X_test,X_val,y_train,y_val,y_test,NUM_TASKS,THRESHOLD)