In [1]:
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow import keras
from tensorflow.python.keras import backend as K
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
import numpy as np
import pickle
import random
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from scipy.stats import pearsonr

2024-08-14 12:00:26.593968: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-14 12:00:26.801998: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-14 12:00:26.802041: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-14 12:00:26.848216: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-14 12:00:26.917750: I tensorflow/core/platform/cpu_feature_guar

In [2]:
drugs_cell_lines_ic50_df = pd.read_csv("..//data//drugs_cell_lines_ic50.csv")

In [3]:
pubchem_drugs_smiles_df = pd.read_csv('..//data//drugs_smile_strings.csv')

In [4]:
drugs_smiles_cell_lines_ic50_df = pd.merge(drugs_cell_lines_ic50_df, pubchem_drugs_smiles_df, 
                                             on = "drug_id")

In [5]:
drugs_smiles_cell_lines_ic50_df = drugs_smiles_cell_lines_ic50_df[["drug_id", "Cancer_Cell_Line", "Smiles", "IC50"]]

In [6]:
drugs_smiles_cell_lines_ic50_df.dtypes

drug_id               int64
Cancer_Cell_Line     object
Smiles               object
IC50                float64
dtype: object

In [7]:
drugs_smiles_cell_lines_ic50_df["drug_id"] = drugs_smiles_cell_lines_ic50_df["drug_id"].astype(object)

In [8]:
with open("..//data//drug_gcn_features.pickle", "rb") as f:
    dict_features = pickle.load(f)

In [9]:
with open("..//data//drug_gcn_normalized_adj_mats.pickle", "rb") as f:
    dict_normalized_adj_mats = pickle.load(f)

In [10]:
dualgcn_train = pd.read_csv("..//data//DualGCN_Embedding_train.csv")

In [11]:
dualgcn_test = pd.read_csv("..//data//DualGCN_Embedding_test.csv")

In [12]:
pubchem_to_drugs_df = pd.read_csv('..//data//1.Drug_listMon Jun 24 09_00_55 2019.csv')

In [13]:
pubchem_to_drugs_df = pubchem_to_drugs_df[["drug_id", "PubCHEM"]]

In [14]:
pubchem_to_drugs_df.dtypes

drug_id     int64
PubCHEM    object
dtype: object

In [15]:
import numpy as np

In [16]:
pubchem_to_drugs_df["PubCHEM"] = [val if str(val).isdigit() else np.nan for val in pubchem_to_drugs_df["PubCHEM"] ]

In [17]:
pubchem_to_drugs_df = pubchem_to_drugs_df.dropna()

In [18]:
pubchem_to_drugs_df.dtypes

drug_id     int64
PubCHEM    object
dtype: object

In [19]:
pubchem_to_drugs_df["drug_id"] = pubchem_to_drugs_df["drug_id"].astype(str)

In [20]:
x_train, x_valid, y_train, y_valid = train_test_split(drugs_smiles_cell_lines_ic50_df.drop(["IC50"],axis =1), drugs_smiles_cell_lines_ic50_df["IC50"].values, 
                                                     test_size = 0.20, random_state = 42)

In [21]:
dualgcn_train["Drug_ID"] = dualgcn_train["Drug_ID"].astype(str)

In [22]:
dualgcn_test["Drug_ID"] = dualgcn_test["Drug_ID"].astype(str)

In [23]:
pubchem_to_drugs_df.dtypes

drug_id    object
PubCHEM    object
dtype: object

In [24]:
dualgcn_train = pubchem_to_drugs_df.merge(dualgcn_train, left_on = ["PubCHEM"], right_on = ["Drug_ID"])

In [25]:
dualgcn_train = dualgcn_train[['Cell_Line', 'drug_id']]

In [26]:
dualgcn_test = pubchem_to_drugs_df.merge(dualgcn_test, left_on = ["PubCHEM"], right_on = ["Drug_ID"])

In [27]:
dualgcn_test = dualgcn_test[['Cell_Line', 'drug_id']]

In [28]:
dualgcn_train.dtypes

Cell_Line    object
drug_id      object
dtype: object

In [29]:
# x_train[

In [30]:
x_train.dtypes

drug_id             object
Cancer_Cell_Line    object
Smiles              object
dtype: object

In [31]:
# dualgcn_train

In [32]:
# x_train['drug_id'].values[0]

In [33]:
x_train['drug_id'] = x_train['drug_id'].astype(str)

In [34]:
x_valid['drug_id'] = x_valid['drug_id'].astype(str)

In [35]:
x_train_valid_feats = pd.concat([x_train, x_valid], ignore_index = True)

In [36]:
y_train_valid = pd.concat([pd.DataFrame(y_train.reshape(-1,1)), pd.DataFrame(y_valid.reshape(-1,1))], ignore_index = True)

In [37]:
combo_train_valid = pd.concat([x_train_valid_feats, y_train_valid],axis =  1)

In [38]:
combo_train_valid.columns = ['drug_id', 'Cancer_Cell_Line', 'Smiles', 'IC50']

In [39]:
# filter x_train x _valid here
x_y_train = combo_train_valid.merge(dualgcn_train, left_on = ['Cancer_Cell_Line','drug_id'], right_on = [ 'Cell_Line','drug_id'])

In [40]:
x_y_test = combo_train_valid.merge(dualgcn_test, left_on = ['Cancer_Cell_Line','drug_id'], right_on = [ 'Cell_Line','drug_id'])

In [41]:
x_train, x_valid, y_train, y_valid = x_y_train.drop(["IC50", 'Cell_Line'],axis = 1), x_y_test.drop(["IC50", 'Cell_Line'], axis =1), x_y_train["IC50"].values, x_y_test["IC50"].values

In [42]:
train_gcn_feats = []
train_adj_list = []
for drug_id in x_train["drug_id"].values:
    train_gcn_feats.append(dict_features[drug_id])
    train_adj_list.append(dict_normalized_adj_mats[drug_id])

In [43]:
valid_gcn_feats = []
valid_adj_list = []
for drug_id in x_valid["drug_id"].values:
    valid_gcn_feats.append(dict_features[drug_id])
    valid_adj_list.append(dict_normalized_adj_mats[drug_id])

In [44]:
train_gcn_feats = np.array(train_gcn_feats).astype("float16")
valid_gcn_feats = np.array(valid_gcn_feats).astype("float16")

In [45]:
train_adj_list = np.array(train_adj_list).astype("float16")
valid_adj_list = np.array(valid_adj_list).astype("float16")

In [46]:
# load models
# omic models
cancer_copy_number_model = tf.keras.models.load_model("..//models//cancer_copy_number_model_no_norm_common")
cancer_cell_gen_expr_model = tf.keras.models.load_model("..//models//cancer_cell_gen_expr_model_no_norm_common")
cancer_cell_gen_methy_model = tf.keras.models.load_model("..//models//cancer_cell_gen_methy_model_no_norm")
cancer_cell_gen_mut_model = tf.keras.models.load_model("..//models//cancer_cell_gen_mut_model_no_norm")

2024-08-14 12:00:52.683873: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31141 MB memory:  -> device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:06:00.0, compute capability: 7.0


















In [47]:
# load models
# drug models
pubchem_drugs_rdkit_model = tf.keras.models.load_model("..//models//pubchem_drugs_rdkit_model_no_norm")





In [48]:
pubchem_drugs_rdkit_model.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization_4 (Text  (None, 1)                 0         
 Vectorization)                                                  
                                                                 
 embedding_4 (Embedding)     (None, 1, 106)            25440     
                                                                 
 flatten_4 (Flatten)         (None, 106)               0         
                                                                 
Total params: 25440 (99.38 KB)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 25440 (99.38 KB)
_________________________________________________________________


In [49]:
std = StandardScaler()

In [50]:
# extract drug features - does not seem like these are used in the network
drug_features_train = pubchem_drugs_rdkit_model(x_train["drug_id"].values).numpy().astype("float32")
drug_features_valid = pubchem_drugs_rdkit_model(x_valid["drug_id"].values).numpy().astype("float32")

In [51]:
np.isinf(drug_features_train).sum()

0

In [52]:
drug_features_train = std.fit_transform(drug_features_train)

In [53]:
drug_features_valid = std.transform(drug_features_valid)

In [54]:
# extract copy number features
omics_copy_number_train = cancer_copy_number_model(x_train["Cancer_Cell_Line"].values).numpy().astype("float16")
omics_copy_number_valid = cancer_copy_number_model(x_valid["Cancer_Cell_Line"].values).numpy().astype("float16")

In [55]:
# omics_copy_number_train

In [56]:
# extract gen expr features
omics_gen_expr_train = cancer_cell_gen_expr_model(x_train["Cancer_Cell_Line"].values).numpy().astype("float16")
omics_gen_expr_valid = cancer_cell_gen_expr_model(x_valid["Cancer_Cell_Line"].values).numpy().astype("float16")

In [57]:
omics_gen_copy_number_gen_expr_train = np.concatenate([np.expand_dims(omics_copy_number_train, -1),
                                                      np.expand_dims(omics_gen_expr_train, -1)], axis = -1)

In [58]:
omics_gen_copy_number_gen_expr_valid = np.concatenate([np.expand_dims(omics_copy_number_valid, -1),
                                                      np.expand_dims(omics_gen_expr_valid, -1)], axis = -1)

In [59]:
# extract gen methylation features
omics_gen_methyl_train = cancer_cell_gen_methy_model(x_train["Cancer_Cell_Line"].values).numpy().astype("float16")
omics_gen_methyl_valid = cancer_cell_gen_methy_model(x_valid["Cancer_Cell_Line"].values).numpy().astype("float16")

In [60]:
# extract gen mutation features
with tf.device('/cpu:0'):
    omics_gen_mut_train = cancer_cell_gen_mut_model.predict(x_train["Cancer_Cell_Line"].values, verbose = 1, batch_size = 256).astype("float16")
    omics_gen_mut_valid = cancer_cell_gen_mut_model.predict(x_valid["Cancer_Cell_Line"].values, verbose = 1, batch_size = 256).astype("float16")



In [61]:
smile_strings_train = x_train["Smiles"].values.reshape(-1,1)
smile_strings_valid = x_valid["Smiles"].values.reshape(-1,1)

In [62]:
selected_info_common_cell_lines = "..//data//cellline_list.txt"
selected_info_common_genes = "..//data/gene_list.txt"

In [63]:
PPI_file = "..//data/PPI_network.txt"

In [64]:
with open(selected_info_common_cell_lines) as f:
    common_cell_lines = [item.strip() for item in f.readlines()]

In [65]:
with open("..//data//common_genes.pickle", "rb") as f:
    common_genes = pickle.load(f)

In [66]:
idx_dic={}
for index, item in enumerate(common_genes):
    idx_dic[item] = index

In [67]:
ppi_adj_info = [[] for item in common_genes] 

In [68]:
# will return for each gene what other gene is connected - PPIs
ppi_adj_info = [[] for item in common_genes] 
for line in open(PPI_file).readlines():
    gene1,gene2 = line.split('\t')[0],line.split('\t')[1]
    if (gene1 in common_genes) & (gene2 in common_genes):
        if idx_dic[gene1]<=idx_dic[gene2]:
            ppi_adj_info[idx_dic[gene1]].append(idx_dic[gene2])
            ppi_adj_info[idx_dic[gene2]].append(idx_dic[gene1])

In [69]:
def CelllineGraphAdjNorm(ppi_adj_info,common_genes = common_genes):
    # with open(selected_info_common_genes) as f:
    #     common_genes = [item.strip() for item in f.readlines()]
    nb_nodes = len(common_genes)
    adj_mat = np.zeros((nb_nodes,nb_nodes),dtype='float32')
    # print(adj_mat.shape)
    for i in range(len(ppi_adj_info)):
        # print(i)
        nodes = ppi_adj_info[i]
        for each in nodes:
            adj_mat[i,each] = 1

    # for checking if two sparse matrices are the same
    assert np.allclose(adj_mat,adj_mat.T)
    norm_adj = NormalizeAdj(adj_mat)
    return norm_adj 

In [70]:
def NormalizeAdj(adj):
    adj = adj + np.eye(adj.shape[0])
    d = sp.diags(np.power(np.array(adj.sum(1)), -0.5).flatten(), 0).toarray()
    a_norm = adj.dot(d).transpose().dot(d)
    return a_norm

In [71]:
import scipy.sparse as sp

In [72]:
ppi_adj = CelllineGraphAdjNorm(ppi_adj_info,common_genes)

In [73]:
ppi_adj = np.expand_dims(ppi_adj,0)

In [74]:
omics_gen_copy_number_gen_expr_train_new = (ppi_adj@omics_gen_copy_number_gen_expr_train)
omics_gen_copy_number_gen_expr_valid_new = (ppi_adj@omics_gen_copy_number_gen_expr_valid)

In [75]:
copy_number_train = omics_gen_copy_number_gen_expr_train_new[:,:,0:1]
copy_number_valid = omics_gen_copy_number_gen_expr_valid_new[:,:,0:1]

In [76]:
gene_expr_train = omics_gen_copy_number_gen_expr_train_new[:,:,1:2]
gene_expr_valid = omics_gen_copy_number_gen_expr_valid_new[:,:,1:2]

In [77]:
valid_items = [[ valid_gcn_feats, valid_adj_list,
                           copy_number_valid, gene_expr_valid,
                           omics_gen_methyl_valid, omics_gen_mut_valid], y_valid]

In [78]:
input_gcn_features = tf.keras.layers.Input(shape = (100, 75))
input_norm_adj_mat = tf.keras.layers.Input(shape = (100, 100))
mult_1 = tf.keras.layers.Dot(1)([input_norm_adj_mat, input_gcn_features])
dense_layer_gcn = tf.keras.layers.Dense(256, activation = "relu")
dense_out = dense_layer_gcn(mult_1)
dense_out = tf.keras.layers.BatchNormalization()(dense_out)
dense_out = tf.keras.layers.Dropout(0.1)(dense_out)
mult_2 = tf.keras.layers.Dot(1)([input_norm_adj_mat, dense_out])
dense_layer_gcn = tf.keras.layers.Dense(128, activation = "relu")
dense_out = dense_layer_gcn(mult_2)
dense_out = tf.keras.layers.BatchNormalization()(dense_out)
dense_out = tf.keras.layers.Dropout(0.1)(dense_out)

# dense_layer_gcn = tf.keras.layers.Dense(100, activation = "relu")
# mult_3 = tf.keras.layers.Dot(1)([input_norm_adj_mat, dense_out])
# dense_out = dense_layer_gcn(mult_3)
# dense_out = tf.keras.layers.BatchNormalization()(dense_out)
# dense_out = tf.keras.layers.Dropout(0.2)(dense_out)

dense_out = tf.keras.layers.GlobalAvgPool1D()(dense_out)

In [79]:
# here is the code for CNV and gene expression
dropout1 = 0.10
dropout2 = 0.20
# first add the CNV
input_cnv = tf.keras.layers.Input(shape = (omics_gen_expr_train.shape[1],1))
    
l1 = tf.keras.layers.Dense(32)(input_cnv)
l1 = tf.keras.layers.Dropout(dropout1)(l1)
l2 = tf.keras.layers.Dense(128)(l1)
l2 = tf.keras.layers.Dropout(dropout1)(l2)
    
dense_layer_gcn1 = tf.keras.layers.Dense(256, activation = "relu")
dense_out_cnv = dense_layer_gcn1(l2)
dense_out_cnv = tf.keras.layers.BatchNormalization()(dense_out_cnv)
dense_out_cnv = tf.keras.layers.Dropout(dropout1)(dense_out_cnv)
# mult_21 = tf.keras.layers.Dot(1)([const_input, dense_out1])
dense_layer_gcn1 = tf.keras.layers.Dense(256, activation = "relu")
dense_out_cnv = dense_layer_gcn1(dense_out_cnv)
dense_out_cnv = tf.keras.layers.BatchNormalization()(dense_out_cnv)
dense_out_cnv = tf.keras.layers.Dropout(dropout1)(dense_out_cnv)
dense_layer_gcn1 = tf.keras.layers.Dense(256, activation = "relu")
dense_out_cnv = dense_layer_gcn1(dense_out_cnv)
dense_out_cnv = tf.keras.layers.BatchNormalization()(dense_out_cnv)
dense_out_cnv = tf.keras.layers.Dropout(dropout1)(dense_out_cnv)
dense_layer_gcn1 = tf.keras.layers.Dense(256, activation = "relu")
dense_out_cnv = dense_layer_gcn1(dense_out_cnv)
dense_out_cnv = tf.keras.layers.BatchNormalization()(dense_out_cnv)
dense_out_cnv = tf.keras.layers.Dropout(dropout1)(dense_out_cnv)
dense_out_cnv = tf.keras.layers.GlobalAvgPool1D()(dense_out_cnv)


In [80]:
all_omics = tf.keras.layers.Concatenate()([ dense_out_cnv, dense_out])

In [81]:
x = tf.keras.layers.Dense(256,activation = 'tanh')(all_omics)
x = tf.keras.layers.Dropout(0.3)(x, training = True)
x = tf.keras.layers.Dense(128,activation = 'tanh')(x)
x = tf.keras.layers.Dropout(0.2)(x, training = True)
x = tf.keras.layers.Dense(10,activation = 'tanh')(x)

In [82]:
final_out_layer = tf.keras.layers.Dense(1)

In [83]:
final_out = final_out_layer(x)

In [84]:
simplegcn = tf.keras.models.Model([input_gcn_features, input_norm_adj_mat, input_cnv], final_out)

In [85]:
simplegcn.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 691, 1)]             0         []                            
                                                                                                  
 dense_2 (Dense)             (None, 691, 32)              64        ['input_3[0][0]']             
                                                                                                  
 dropout_2 (Dropout)         (None, 691, 32)              0         ['dense_2[0][0]']             
                                                                                                  
 dense_3 (Dense)             (None, 691, 128)             4224      ['dropout_2[0][0]']           
                                                                                              

In [86]:
simplegcn.compile(loss = tf.keras.losses.MeanSquaredError(), 
                    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001), 
                    metrics = [tf.keras.metrics.RootMeanSquaredError()])

In [87]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

In [88]:
chunks = np.array_split(list(range(0,valid_gcn_feats.shape[0])), 3)

In [89]:
# this is the train index
new_train_index = 1
new_valid_index = 2
new_test_index = 0

In [90]:
train_items = list(chunks[new_train_index])
valid_items = list(chunks[new_valid_index])
test_items = list(chunks[new_test_index])

In [91]:
# First the data - let's use a train set of 6k - then the rest will be test here (from the remaining values, use 3k for training the stacker, 1k for validation of it, and the rest ~7k as the final test data). 
# drug feats
new_train_gcn_feats = valid_gcn_feats[train_items, :,:]
new_valid_gcn_feats = valid_gcn_feats[valid_items, :,:]
new_test_gcn_feats = valid_gcn_feats[test_items, :,:]
print(new_train_gcn_feats.shape, new_valid_gcn_feats.shape, new_test_gcn_feats.shape)

# drug adj info
new_train_adj_list = valid_adj_list[train_items, :,:]
new_valid_adj_list = valid_adj_list[valid_items, :,:]
new_test_adj_list = valid_adj_list[test_items, :,:]
print(new_train_adj_list.shape, new_valid_adj_list.shape, new_test_adj_list.shape)

# cnv
new_omics_cna_train = copy_number_valid[train_items, :]
new_omics_cna_valid = copy_number_valid[valid_items, :]
new_omics_cna_test = copy_number_valid[test_items,:]
print(new_omics_cna_train.shape, new_omics_cna_valid.shape, new_omics_cna_test.shape)

# expression
new_omics_expr_train = gene_expr_valid[train_items, :]
new_omics_expr_valid = gene_expr_valid[valid_items, :]
new_omics_expr_test = gene_expr_valid[test_items,:]
print(new_omics_cna_train.shape, new_omics_expr_valid.shape, new_omics_cna_test.shape)

# y
new_y_train = y_valid[train_items,]
new_y_valid = y_valid[valid_items,]
new_y_test = y_valid[test_items,]
print(new_y_train.shape, new_y_valid.shape, new_y_test.shape)

(5772, 100, 75) (5772, 100, 75) (5772, 100, 75)
(5772, 100, 100) (5772, 100, 100) (5772, 100, 100)
(5772, 691, 1) (5772, 691, 1) (5772, 691, 1)
(5772, 691, 1) (5772, 691, 1) (5772, 691, 1)
(5772,) (5772,) (5772,)


In [92]:
np.mean(new_y_train), np.std(new_y_train), np.mean(new_y_valid), np.std(new_y_valid), np.mean(new_y_test), np.std(new_y_test)

(2.027896338184338,
 2.8326837766892257,
 2.094244087144837,
 2.8136855131635183,
 2.069332453915454,
 2.8460217894549977)

In [93]:
# now, get the warm start weights from the previously trained network

# import the model
simpleGCN_original_model = tf.keras.models.load_model("../models/dualgcn_trained_on_domain/")

In [94]:
simpleGCN_original_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 691, 1)]             0         []                            
                                                                                                  
 input_4 (InputLayer)        [(None, 691, 1)]             0         []                            
                                                                                                  
 dense_2 (Dense)             (None, 691, 32)              64        ['input_3[0][0]']             
                                                                                                  
 dense_8 (Dense)             (None, 691, 32)              64        ['input_4[0][0]']             
                                                                                              

In [95]:
# get and set weights

# get weights first

# CNV
Dense_1_weights, Dense_1_bias = simpleGCN_original_model.layers[2].get_weights()
Dense_2_weights, Dense_2_bias = simpleGCN_original_model.layers[6].get_weights()
Dense_3_weights, Dense_3_bias = simpleGCN_original_model.layers[10].get_weights()
bn_1_weights = simpleGCN_original_model.layers[12].get_weights()
Dense_4_weights, Dense_4_bias = simpleGCN_original_model.layers[16].get_weights()
bn_2_weights = simpleGCN_original_model.layers[20].get_weights()
Dense_5_weights, Dense_5_bias = simpleGCN_original_model.layers[26].get_weights()
bn_3_weights = simpleGCN_original_model.layers[29].get_weights()
Dense_6_weights, Dense_6_bias = simpleGCN_original_model.layers[35].get_weights()
bn_4_weights = simpleGCN_original_model.layers[38].get_weights()

# drugs
Dense_1_weights_drug, Dense_1_bias_drug = simpleGCN_original_model.layers[25].get_weights()
bn_1_weights_drug= simpleGCN_original_model.layers[28].get_weights()
Dense_2_weights_drug, Dense_2_bias_drug = simpleGCN_original_model.layers[37].get_weights()
bn_2_weights_drug= simpleGCN_original_model.layers[40].get_weights()

# once concatenated
Dense_1_weights_concat, Dense_1_bias_concat = simpleGCN_original_model.layers[50].get_weights()
Dense_2_weights_concat, Dense_2_bias_concat = simpleGCN_original_model.layers[52].get_weights()
Dense_final_weights_concat, Dense_final_bias_concat = simpleGCN_original_model.layers[53].get_weights()

In [96]:
# set weights
# cnv
simplegcn.layers[1].set_weights((Dense_1_weights, Dense_1_bias))
simplegcn.layers[3].set_weights((Dense_2_weights, Dense_2_bias))
simplegcn.layers[5].set_weights((Dense_3_weights, Dense_3_bias))
simplegcn.layers[6].set_weights(bn_1_weights)
simplegcn.layers[8].set_weights((Dense_4_weights, Dense_4_bias))
simplegcn.layers[11].set_weights(bn_2_weights)
simplegcn.layers[15].set_weights((Dense_5_weights, Dense_5_bias))
simplegcn.layers[17].set_weights(bn_3_weights)
simplegcn.layers[21].set_weights((Dense_6_weights, Dense_6_bias))
simplegcn.layers[23].set_weights(bn_4_weights)

# drugs
simplegcn.layers[14].set_weights((Dense_1_weights_drug, Dense_1_bias_drug))
simplegcn.layers[16].set_weights(bn_1_weights_drug)
simplegcn.layers[22].set_weights((Dense_2_weights_drug, Dense_2_bias_drug))
simplegcn.layers[24].set_weights(bn_2_weights_drug)

# concat
simplegcn.layers[32].set_weights((Dense_1_weights_concat, Dense_1_bias_concat))
simplegcn.layers[34].set_weights((Dense_2_weights_concat, Dense_2_bias_concat))
simplegcn.layers[35].set_weights((Dense_final_weights_concat, Dense_final_bias_concat))

In [97]:
%%time
history = simplegcn.fit([new_train_gcn_feats, new_train_adj_list, new_omics_cna_train], new_y_train, 
                         
          batch_size = 512, epochs = 1000, verbose = 1,
                         
          validation_data = ([new_valid_gcn_feats, new_valid_adj_list, new_omics_cna_valid], new_y_valid),
                         

        callbacks = tf.keras.callbacks.EarlyStopping(monitor = "val_loss", patience = 20, restore_best_weights=True,
                                                       mode = "min"), 
         validation_batch_size = 512, shuffle = True)

Epoch 1/1000


2024-08-14 12:01:43.215227: I external/local_xla/xla/service/service.cc:168] XLA service 0x154e14002580 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-08-14 12:01:43.215289: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): Tesla V100S-PCIE-32GB, Compute Capability 7.0
2024-08-14 12:01:43.251729: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-08-14 12:01:43.348704: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907
I0000 00:00:1723654903.557583 1524949 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000


In [98]:
simplegcn.save("..//Stand_alone_models//models//ablation_dualgcn_fine_tuned_split_" + str(new_test_index))

INFO:tensorflow:Assets written to: ..//Stand_alone_models//models//ablation_dualgcn_fine_tuned_split_0/assets


INFO:tensorflow:Assets written to: ..//Stand_alone_models//models//ablation_dualgcn_fine_tuned_split_0/assets


In [99]:
reps = 25

In [100]:
%time
catch_preds = np.zeros((new_test_gcn_feats.shape[0],reps))
for i in range(0,reps):
    print(i)
    preds = simplegcn.predict([ new_test_gcn_feats, new_test_adj_list, new_omics_cna_test], 
                              batch_size = 1024, verbose = 0)
    catch_preds[:,i] = preds[:,0]

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 11.4 µs
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24


In [101]:
preds_data = np.hstack((new_y_test.reshape(-1,1), catch_preds))

In [102]:
np.save("..//Stand_alone_models//data//ablation_dualgcn_preds_val_and_test_" +str(new_test_index) + ".npy",preds_data )

In [103]:
preds_data.shape, catch_preds.shape

((5772, 26), (5772, 25))

In [104]:
catch_preds

array([[3.92181921, 3.76653957, 3.79549003, ..., 3.43819952, 3.8264184 ,
        3.84885311],
       [2.38459349, 2.61465216, 2.3409462 , ..., 2.55625057, 2.2910943 ,
        2.26207757],
       [1.69516134, 1.77077317, 2.21678042, ..., 2.08600497, 2.77352476,
        1.83155179],
       ...,
       [1.11633945, 1.18959475, 1.10393012, ..., 1.44226742, 0.15048131,
        1.09602559],
       [2.93985319, 2.65909219, 2.82835793, ..., 3.0556953 , 2.72217798,
        2.81147528],
       [2.32531357, 2.3376658 , 2.50122833, ..., 2.18157601, 2.41228938,
        2.43421841]])

In [105]:
li, ui = np.percentile(catch_preds, q = [2.5, 97.5], axis = 1)[0,:], np.percentile(catch_preds, q = [2.5, 97.5], axis = 1)[1,:]

In [106]:
ui

array([4.07782307, 2.63415184, 3.35702481, ..., 2.02873106, 3.06027617,
       2.59927001])

In [107]:
test_coverage = np.mean((new_y_test.reshape(-1,1) >= li.reshape(-1,1)) & (new_y_test.reshape(-1,1) <= ui.reshape(-1,1)))
test_coverage

0.3057865557865558

In [108]:
test_width = np.mean(ui.reshape(-1,1) - li.reshape(-1,1))
test_width

1.0431522343098618

In [109]:
catch_preds.mean(1).shape

(5772,)

In [110]:
new_y_test.shape

(5772,)

In [111]:
test_pearsonr = pearsonr(new_y_test, catch_preds.mean(1))
test_pearsonr[0]

0.852824787397635

In [112]:
test_rmse = np.sqrt(np.mean((catch_preds.mean(1).reshape(-1,1)-new_y_test.reshape(-1,1))**2))
test_rmse

1.4868933412580398

In [113]:
# verify the test means match
# import a file from Fold 1 
check_path_fold_0 = "../MEnKF_fold_results/Fold_0/CI_infor_ablation_0.csv"
check_file_fold_0 = pd.read_csv(check_path_fold_0)

In [114]:
# extract the true values from here
fold_0_true_test_y = check_file_fold_0['true_val']

In [115]:
fold_0_true_test_y.mean(), fold_0_true_test_y.std()

(2.0693324538145625, 2.8462683556114916)

In [116]:
# earlier values
np.mean(new_y_test), np.std(new_y_test)

(2.069332453915454, 2.8460217894549977)