In [None]:
import tensorflow as T
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras import initializers
from tensorflow.keras.regularizers import l1_l2, l1, l2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.constraints import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.losses import *
from tensorflow.keras.wrappers.scikit_learn import *
from tensorflow.keras.callbacks import TensorBoard
from tensorboard.plugins.hparams import api as hp

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import DataStructs
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem.AllChem import GetMorganFingerprintAsBitVect

from IPython.display import clear_output
import numpy as np
import pandas as pd
from sklearn.metrics import *
from sklearn.model_selection import train_test_split
from math import sqrt
from statistics import *
from matplotlib import pyplot
import matplotlib.pyplot as plt
import seaborn as sns
import time
import datetime
import shap 
shap.initjs()
import os 
import io
from tensorboard.plugins.hparams import api as hp
from tensorflow.keras.models import load_model
from utils import utils
import gc
#os.environ["TF_CPP_MIN_LOG_LEVEL"] = "-1"

In [None]:
#Set the percentage of cuda cores (1 = 100%)
commons = utils.Commons()
ts_helper = utils.TS_Helper()
shap_helper = utils.Shap_Helper()
ts_helper.model_type = ts_helper.Regression
FRACTION = 0.75

ts_helper.set_gpu_fraction(FRACTION)


In [None]:
#Create y_train, y_test and y_test sets

TASK_START = 2
N_TASKS = 3
TASK_END = TASK_START + N_TASKS
SMILES = "SMILES"
data_tryp = r"./data/data_tryp.csv"
molecula_hit = r"./data/hit_smiles.csv"

train_dataset,y_train,train_smiles = commons.load_dataset(data_tryp,SMILES, TASK_START, N_TASKS)
hit_data,y_hit,hit_smiles = commons.load_dataset(molecula_hit,SMILES, TASK_START, N_TASKS)

In [None]:
train_stats = train_dataset.describe()
train_stats.pop("ID")

In [None]:
FP_SIZE = 2048
RADIUS = 2
FEAT = False

X_train = commons.assing_fp(train_smiles,FP_SIZE,RADIUS,FEAT)
X_hit = commons.assing_fp(hit_smiles,FP_SIZE,RADIUS,FEAT)
#split data for model building
#X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=1, shuffle=True)

X_train.shape, X_hit.shape

In [None]:
base_model = load_model('./models/TM-FNN_regression_model.hdf5',custom_objects={'regression_loss':ts_helper.regression_loss, 'lr': ts_helper.get_lr_metric})
    
optimizer = RMSprop(learning_rate=0.001)
lr_metric = ts_helper.get_lr_metric(optimizer)
base_model.compile(loss=ts_helper.regression_loss, metrics = [lr_metric])    
base_model.summary()
T.keras.utils.plot_model(base_model)

# Get the image fragment 

#### Get the overall bit information

In [None]:
atom_list, radius_list, mols, bit_list = shap_helper.get_bit_info(hit_smiles,FP_SIZE,RADIUS,FEAT)
print(len(mols))
frag_list = shap_helper.generateFragList(mols,radius_list,atom_list,bit_list)
print(frag_list[1])

# shape Value

In [None]:
X_f_train = shap_helper.formatDataforShapValues(X_train)
X_f_hit = shap_helper.formatDataforShapValues(X_hit)

X_f_train.shape, X_f_hit.shape,X_f_train.head()

In [None]:
masker = shap.maskers.Independent(data = X_hit)

In [None]:
explainer_kernel = shap.KernelExplainer(model = base_model.predict, data = X_f_hit.head(4), link = "identity")

In [None]:
import ipywidgets as widgets

# Create the list of all labels for the drop down list
list_of_labels = hit_data.iloc[:,TASK_START:TASK_END].columns.to_list()

# Create a list of tuples so that the index of the label is what is returned
tuple_of_labels = list(zip(list_of_labels, range(len(list_of_labels))))

# Create a widget for the labels and then display the widget
current_label = widgets.Dropdown(options=tuple_of_labels,
                              value=0,
                              description='Select Label:'
                              )

# Display the dropdown list (Note: access index value with 'current_label.value')
current_label

In [None]:
start = 0
end = start+1
X_f_hit.iloc[[start,end],:]

In [None]:
import math
# Set the index of the specific example to explain
gc.collect()
shap_value_single = explainer_kernel.shap_values(X = X_f_hit.iloc[start:end,:], nsamples = 1000)
best_bits = shap_helper.get_bits_fromBestShaps(X_f_hit,shap_value_single,20)

In [None]:
# print the JS visualization code to the notebook
print(f'Current Label Shown: {list_of_labels[current_label.value]}\n')

shap.force_plot(base_value = explainer_kernel.expected_value[current_label.value],
                matplotlib=False,
                shap_values = shap_value_single[current_label.value],
                features = X_f_train.iloc[start:end:])


In [None]:
# print the JS visualization code to the notebook
print(f'Current Label Shown: {list_of_labels[current_label.value]}\n')

shap.force_plot(base_value = explainer_kernel.expected_value[current_label.value],
                shap_values = shap_value_single[current_label.value],
                features = X_f_train.iloc[start:end,:],
                show=True,matplotlib=True, plot_cmap='RdBu')


In [None]:
# fragment_bit1 = 93
# fragment_bit2 = 197
best_bits = best_bits
combined_list = frag_list
shap_helper.draw_highlightedMols(mols,combined_list,0,best_bits)[0]
