## TensorBoard

In [None]:
%load_ext tensorboard

In [None]:
import datetime

In [None]:
!rm -rf ./logs/

## 連接本機(fail)

In [None]:
!pip install google-colab

In [None]:
!jupyter nbconvert --execute index.ipynb

In [None]:
!pip install matplotlib

## 前置作業

In [None]:
!pip install -q tensorflow-gnn

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import pandas as pd
import math
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_gnn as tfgnn

In [None]:
from tensorflow.python.client import device_lib

print(device_lib.list_local_devices())

In [None]:
graph_tensor_spec = tfgnn.GraphTensorSpec.from_piece_specs(
    context_spec=tfgnn.ContextSpec.from_field_specs(features_spec={
                  'label': tf.TensorSpec(shape=(1,), dtype=tf.int32)
    }),
    node_sets_spec={
        'atoms':
            tfgnn.NodeSetSpec.from_field_specs(
                features_spec={
                    tfgnn.HIDDEN_STATE:
                        tf.TensorSpec((None, 7), tf.float32)
                },
                sizes_spec=tf.TensorSpec((1,), tf.int32))
    },
    edge_sets_spec={
        'bonds':
          tfgnn.EdgeSetSpec.from_field_specs(
            features_spec={
                    tfgnn.HIDDEN_STATE:
                        tf.TensorSpec((None, 4), tf.float32)
                },
                sizes_spec=tf.TensorSpec((1,), tf.int32),
                adjacency_spec=tfgnn.AdjacencySpec.from_incident_node_sets(
                    'atoms', 'atoms'))
    })


def decode_fn(record_bytes):
  graph = tfgnn.parse_single_example(
      graph_tensor_spec, record_bytes, validate=True)

  # extract label from context and remove from input graph
  print(graph)
  context_features = graph.context.get_features_dict()
  print(context_features)
  label = context_features.pop('label')
  print(context_features)
  print(label)
  new_graph = graph.replace_features(context=context_features)
  print(new_graph.node_sets['atoms']['hidden_state'])
  for _i in new_graph.node_sets['atoms']['hidden_state']:
    print(_i)
  return new_graph, label

## load training data (for cross-validation)

In [None]:
train_ds=[]
val_ds=[]

for itr in range(1,11):
    train_path = '/content/drive/MyDrive/forth_dataset/10_folds_CV/train/train_smiles_'+str(itr)+'.tfrecord'
    val_path = '/content/drive/MyDrive/forth_dataset/10_folds_CV/val/val_smiles_'+str(itr)+'.tfrecord'
    train_ds.append(tf.data.TFRecordDataset([train_path]).map(decode_fn))
    val_ds.append(tf.data.TFRecordDataset([val_path]).map(decode_fn))


## load final training data

In [None]:
final_path='/content/drive/MyDrive/forth_dataset/all_train_smiles.tfrecord'
final_ds=tf.data.TFRecordDataset([final_path]).map(decode_fn)

In [None]:
print(final_ds.element_spec[1])

## 蓋模型 (for cross-validation)

In [None]:
batch_size = 32
train_ds_batched=[]
val_ds_batched=[]

for i in range(10):
    train_ds_batched.append(train_ds[i].batch(batch_size=batch_size).repeat())
    val_ds_batched.append(val_ds[i].batch(batch_size=batch_size))

In [None]:
def _build_model(
    graph_tensor_spec,
    node_dim=16,
    edge_dim=16,
    message_dim=64,
    next_state_dim=64,
    num_classes=2,
    num_message_passing=3,
    l2_regularization=2e-3,
    dropout_rate=0.2,
):

    input_graph = tf.keras.layers.Input(type_spec=graph_tensor_spec)
    graph = input_graph.merge_batch_to_components()

    def set_initial_node_state(node_set, *, node_set_name):
        return tf.keras.layers.Dense(node_dim)(node_set[tfgnn.HIDDEN_STATE])

    def set_initial_edge_state(edge_set, *, edge_set_name):
        return tf.keras.layers.Dense(edge_dim)(edge_set[tfgnn.HIDDEN_STATE])

    graph = tfgnn.keras.layers.MapFeatures(
        node_sets_fn=set_initial_node_state, edge_sets_fn=set_initial_edge_state)(
            graph)

    def dense(units, activation="relu"):
        """A Dense layer with regularization (L2 and Dropout)."""
        regularizer = tf.keras.regularizers.l2(l2_regularization)
        return tf.keras.Sequential([
            tf.keras.layers.Dense(
                units,
                activation=activation,
                kernel_regularizer=regularizer,
                bias_regularizer=regularizer),
            tf.keras.layers.Dropout(dropout_rate)
        ])

    for i in range(num_message_passing):
        graph = tfgnn.keras.layers.GraphUpdate(
            node_sets={
                "atoms": tfgnn.keras.layers.NodeSetUpdate(
                    {"bonds": tfgnn.keras.layers .SimpleConv(
                        sender_edge_feature=tfgnn.HIDDEN_STATE,
                        message_fn=dense(message_dim),
                        reduce_type="sum",
                        receiver_tag=tfgnn.TARGET)},
                    tfgnn.keras.layers.NextStateFromConcat(dense(next_state_dim)))}

        )(graph)

    readout_features = tfgnn.keras.layers.Pool(tfgnn.CONTEXT, "mean", node_set_name="atoms")(graph)
    logits = tf.keras.layers.Dense(1)(readout_features)

    return tf.keras.Model(inputs=[input_graph], outputs=[logits])

In [None]:
!rm -rf ./logs/

In [None]:
history={}

model_input_graph_spec, label_spec = train_ds[0].element_spec
del label_spec

for i in range(1,11):

    log_dir = "logs/fit_"+str(i)+"/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    model = _build_model(model_input_graph_spec)

    loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    metrics = [tf.keras.metrics.BinaryAccuracy(threshold=0.),
           tf.keras.metrics.BinaryCrossentropy(from_logits=True)
           ]

    model.compile(tf.keras.optimizers.Adam(), loss=loss, metrics=metrics)

    model.summary()
    history[str(i)]=model.fit(train_ds_batched[i-1],steps_per_epoch=8,epochs=250,validation_data=val_ds_batched[i-1], callbacks=[tensorboard_callback])

In [None]:
%tensorboard --logdir logs/fit_9

In [None]:
title=['loss', 'binary_accuracy', 'binary_crossentropy', 'val_loss', 'val_binary_accuracy', 'val_binary_crossentropy', 'recall', 'val_recall', 'precision', 'val_precision']
train_loss=[]
train_acc=[]
train_bce=[]
val_loss=[]
val_acc=[]
val_bce=[]

for itr in range(1,11):
    for k, hist in history[str(itr)].history.items():
        if(k==title[0]):
            train_loss.append(hist)
        elif(k==title[1]):
            train_acc.append(hist)
        elif(k==title[2]):
            train_bce.append(hist)
        elif(k==title[3]):
            val_loss.append(hist)
        elif(k==title[4]):
            val_acc.append(hist)
        elif(k==title[5]):
            val_bce.append(hist)

train_loss=np.mean(train_loss, 0)
train_acc=np.mean(train_acc, 0)
train_bce=np.mean(train_bce, 0)
val_loss=np.mean(val_loss, 0)
val_acc=np.mean(val_acc, 0)
val_bce=np.mean(val_bce, 0)

In [None]:
print(val_acc[-1])

In [None]:
plt.plot(train_loss, label='train')
plt.plot(val_loss, label='validation')
plt.legend()
plt.title('loss')
plt.show()

plt.plot(train_acc, label='train')
plt.plot(val_acc, label='validation')
plt.legend()
plt.title('accracy')
plt.show()

plt.plot(train_bce, label='train')
plt.plot(val_bce, label='validation')
plt.legend()
plt.title('binary cross-entropy')
plt.show()

## 蓋模型 (final)

In [None]:
batch_size = 32
final_ds_batched=final_ds.batch(batch_size=batch_size).repeat()

In [None]:
def _build_model(
    graph_tensor_spec,
    node_dim=16,
    edge_dim=16,
    message_dim=64,
    next_state_dim=64,
    num_classes=2,
    num_message_passing=3,
    l2_regularization=2e-3,
    dropout_rate=0.2,
):

    input_graph = tf.keras.layers.Input(type_spec=graph_tensor_spec)
    graph = input_graph.merge_batch_to_components()

    def set_initial_node_state(node_set, *, node_set_name):
        return tf.keras.layers.Dense(node_dim)(node_set[tfgnn.HIDDEN_STATE])

    def set_initial_edge_state(edge_set, *, edge_set_name):
        return tf.keras.layers.Dense(edge_dim)(edge_set[tfgnn.HIDDEN_STATE])

    graph = tfgnn.keras.layers.MapFeatures(
        node_sets_fn=set_initial_node_state, edge_sets_fn=set_initial_edge_state)(
            graph)

    def dense(units, activation="tanh"):
        """A Dense layer with regularization (L2 and Dropout)."""
        regularizer = tf.keras.regularizers.l2(l2_regularization)
        return tf.keras.Sequential([
            tf.keras.layers.Dense(
                units,
                activation=activation,
                kernel_regularizer=regularizer,
                bias_regularizer=regularizer),
            tf.keras.layers.Dropout(dropout_rate)
        ])

    for i in range(num_message_passing):
        graph = tfgnn.keras.layers.GraphUpdate(
            node_sets={
                "atoms": tfgnn.keras.layers.NodeSetUpdate(
                    {"bonds": tfgnn.keras.layers .SimpleConv(
                        sender_edge_feature=tfgnn.HIDDEN_STATE,
                        message_fn=dense(message_dim),
                        reduce_type="sum",
                        receiver_tag=tfgnn.TARGET)},
                    tfgnn.keras.layers.NextStateFromConcat(dense(next_state_dim)))}

        )(graph)

    readout_features = tfgnn.keras.layers.Pool(tfgnn.CONTEXT, "mean", node_set_name="atoms")(graph)
    logits = tf.keras.layers.Dense(1)(readout_features)

    return tf.keras.Model(inputs=[input_graph], outputs=[logits])

In [None]:
model_input_graph_spec, label_spec = final_ds.element_spec
del label_spec
model=_build_model(model_input_graph_spec)

In [None]:
!rm -rf ./logs/

In [None]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
metrics = [tf.keras.metrics.BinaryAccuracy(threshold=0.),
           tf.keras.metrics.BinaryCrossentropy(from_logits=True),]

In [None]:
model.compile(tf.keras.optimizers.Adam(), loss=loss, metrics=metrics)

In [None]:
model.summary()

In [None]:
tf.keras.utils.plot_model(model, to_file='model.png', show_shapes=False, show_layer_names=False, rankdir='LR')

In [None]:
history=model.fit(final_ds_batched, steps_per_epoch=9,epochs=250,callbacks=[tensorboard_callback])

In [None]:
%tensorboard --logdir logs/fit

In [None]:
for k, hist in history.history.items():
    plt.title(k)
    plt.plot(hist)
    plt.show()

## 預測

In [None]:
############################Prediction of target################################

In [None]:
predict_path = '/content/drive/MyDrive/fifth_dataset/fda_fixed_1.tfrecord'
predict_file = pd.read_csv('/content/drive/MyDrive/fifth_dataset/fda_fixed_predicted.csv')
predict_ds = tf.data.TFRecordDataset([predict_path]).map(decode_fn)
predict_ds_batched = predict_ds.batch(batch_size=1)
print(predict_ds)

In [None]:
predictions = model.predict(predict_ds_batched)

In [None]:
print(predictions)

In [None]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

In [None]:
### use sigmoid ###
pre_res=[]
ans=[]
rank=[]
distn=[0,0,0,0,0,0,0,0,0,0]
for idx,p in zip(range(len(predict_file)),predictions):
    fix = round(sigmoid(p), 4)
    pre_res.append(fix)
    rank.append(fix)
    rank.sort()
    if (fix >= 0.9):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[9]+=1
    elif(fix>=0.8):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[8]+=1
    elif(fix>=0.7):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[7]+=1
    elif(fix>=0.6):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[6]+=1
    elif(fix>=0.5):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[5]+=1
    elif(fix>=0.4):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[4]+=1
    elif(fix>=0.3):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[3]+=1
    elif(fix>=0.2):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[2]+=1
    elif(fix>=0.1):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[1]+=1
    else:
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[0]+=1

In [None]:
### use tanh ###
pre_res=[]
ans=[]
rank=[]
distn=[0,0,0,0,0,0,0,0,0,0]
for idx,p in zip(range(len(predict_file)),predictions):
    fix = round(math.tanh(p)/2+0.5,4)
    pre_res.append(fix)
    if (fix >= 0.9):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[9]+=1
    elif(fix>=0.8):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[8]+=1
    elif(fix>=0.7):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[7]+=1
    elif(fix>=0.6):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[6]+=1
    elif(fix>=0.5):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[5]+=1
    elif(fix>=0.4):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[4]+=1
    elif(fix>=0.3):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[3]+=1
    elif(fix>=0.2):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[2]+=1
    elif(fix>=0.1):
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[1]+=1
    else:
        ans.append([predict_file.iloc[idx].values[0], fix])
        distn[0]+=1


In [None]:
for i in ans:
    print(i)
print(len(ans))

In [None]:
rank.reverse()

In [None]:
for i in range(35):
    print(rank[i])

In [None]:
print(distn)

In [None]:
#Write
predict_file['SMILES']=pre_res
predict_file.to_csv("/content/drive/MyDrive/third_dataset/fda_fixed_from_gnn.csv",index=False)

## 其他測試

In [None]:
#################Prediction of next part's inputs############################

In [None]:
predict_train_path = '/content/drive/MyDrive/experiment_result/all_train_smiles.tfrecord'
predict_train_ds = tf.data.TFRecordDataset([predict_train_path]).map(decode_fn)
predict_train_ds_batched = predict_train_ds.batch(batch_size=1)

In [None]:
predictions_train = model.predict(predict_train_ds_batched)
pre_train_res = []
for p in predictions_train:
    pre_train_res.append(round(math.tanh(p)/2+0.5,4))

In [None]:
predict_csv=pd.read_csv("/content/drive/MyDrive/dataset/train_filtered.csv")
predict_csv['SMILES']=pre_train_res
predict_csv.to_csv("/content/drive/MyDrive/dataset/train_filtered.csv",index=False)

In [None]:
###########################################################

In [None]:
######check_ans######

data_train = pd.read_csv('/content/drive/MyDrive/dataset/train_onlySMILES.csv')
lb=[]
cnt=0
for i in range(285):
    lb.append(data_train.iloc[i].values[1])

for i,j in zip(pre_train_res,lb):
    print(f'{i} {j}')
    if((i>0.8 and j==0) or (i<0.2 and j==1)):
        cnt=cnt+1
print(cnt)


In [None]:
import keras
from keras import layers

In [None]:
keras.utils.plot_model(
    model,
    show_shapes=False,
    show_dtype=True,
    show_layer_names=True,
    rankdir="LR",
    expand_nested=True,
    show_layer_activations=True,
    show_trainable=False,
    )