In [None]:
import os
import deepchem as dc
import numpy as np
import pandas as pd
import tensorflow as tf
os.environ['CUDA_VISIBLE_DEVICES']='0'

## model setup

In [None]:
from visar.model_training_utils import prepare_dataset
import os
import pandas as pd

RUN_KEY = 'Serotonin_Aug14'

log_path = './logs/' + RUN_KEY
os.system('mkdir %s' % log_path)
dataset_file = '%s/raw_data.csv' % (log_path)
MT_dat_name = './data/MT_data_clean_June28.csv'
FP_type = 'Circular_2048'
task_list = ['T51', 'T106','T107','T227', 'T108'] # 5HT-1a/1b/2a/2b/2c
#add_features = ['MW', 'logP', 'BertzCT', 'TPSA']

n_features = 2048
layer_sizes = [512, 64]
bypass_layer_sizes=[128]
bypass_dropouts = [.5]
dropout = 0.5
lr = 0.0005

## model training

In [None]:
from keras.layers import Dense, Input
from keras.layers.core import Dropout
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint

from visar.model_training_utils import prepare_dataset, RobustMT_model_training

In [None]:
RobustMT_model_training(MT_dat_name, FP_type, task_list, log_path, 
                        n_features, layer_sizes, bypass_layer_sizes, bypass_dropouts, dropout, lr,
                        epoch_num = 10, N_test = 5000,
                        add_features = None, n_epoch = 40)

In [None]:
# visualize the evaluation scores along the training process
import seaborn as sns
import pandas as pd
from visar.VISAR_model_utils import generate_performance_plot_RobustMT

In [None]:
plot_df = generate_performance_plot_RobustMT(train_file = './logs/ICD_Oct7_tot/model_train_log.csv',
                                             test_file = './logs/ICD_Oct7_tot/model_test_log.csv')

In [None]:
import matplotlib.pyplot as plt
g = sns.FacetGrid(plot_df, col = 'tt', hue = 'tasks')
g = (g.map(plt.plot, 'step', 'R2', marker = '.')).add_legend()

## process trained results for VISAR analysis

In [None]:
from VISAR_model_utils_v2 import generate_RUNKEY_dataframe_RobustMT
prev_model = './logs/Serotonin_Aug14/model-1200'
RUNKEY = './logs/Serotonin_Aug14/'

task_list = ['T51', 'T106','T107','T227', 'T108'] # 5HT-1a/1b/2a/2b/2c
#add_features = ['MW','logP','BertzCT','TPSA']
dataset_file = '%s/raw_data.csv' % (RUNKEY)
MT_dat_name = './data/MT_data_clean_June28.csv'
FP_type = 'Circular_2048'
model_flag = 'MT'

n_features = 2048
layer_sizes = [512, 64]
bypass_layer_sizes=[128]
bypass_dropouts = [.5]
dropout = 0.5
learning_rate = 0.001
n_layer = 2
n_bypass = 2
add_features = None

output_prefix = RUNKEY + '/RobustMT_serotonin_output_'

In [None]:
generate_RUNKEY_dataframe_RobustMT_new(prev_model, output_prefix, task_list, dataset_file, 
                                       FP_type, add_features, n_features, layer_sizes, 
                                       bypass_layer_sizes, model_flag, n_bypass, n_layer = n_layer)

## processing custom files
- generate target profile predictions for compounds in custom dataset
- mapping custom dataset to trained chemical space

In [None]:
custom_file = './data/custom_file_npcp.txt'
custom_id_field = 'CID'
custom_smiles_field = 'canonical_smiles'
custom_task_field = 'mw'
sep_custom_file = '\t'

K = 8  # number of clusters specified for clustering of the chemicals
valid_cutoff = 0.6 # if specified, filter for only tasks with good performance (with test R2 higher than the cutoff)

In [None]:
generate_RUNKEY_dataframe_RobustMT_new(prev_model, output_prefix, task_list, dataset_file, 
                                       FP_type, add_features, n_features, layer_sizes, 
                                       bypass_layer_sizes, model_flag, n_bypass,
                                       custom_file = custom_file, custom_id_field = custom_id_field, 
                                       custom_task_field = custom_task_field,
                                       custom_smiles_field = custom_smiles_field, 
                                       sep_custom_file = sep_custom_file, 
                                       K = K, valid_cutoff = valid_cutoff)

Next:
- copy output files (including output_compound_df, output_batch_df, output_task_df) to VISAR_webapp data directory, and clear the static directory if neccessary;
- start the app in prompt window by 'bokeh serve --show VISAR_webapp'