In [7]:
## This Cell is related to date preprocessing and is completely copied from original kipf's github code!

import numpy as np
import pandas as pd
import scipy.sparse as sp
import torch,torch.nn,torch.sparse,torch.nn.functional,torch.distributions
from sklearn.metrics import roc_auc_score,average_precision_score
from input_data import load_data
from preprocessing import preprocess_graph, sparse_to_tuple, mask_test_edges
np.set_printoptions(threshold=np.inf)




W_gene      =  pd.read_table( "GBM_normalized/gene.txt", delim_whitespace=True).to_numpy()
W_methy     =  pd.read_table( "GBM_normalized/methy.txt", delim_whitespace=True).to_numpy()
W_mirna     =  pd.read_table( "GBM_normalized/mirna.txt", delim_whitespace=True).to_numpy()
W_truelabel =  pd.read_table( "GBM_normalized/truelabel.txt", delim_whitespace=True).to_numpy()
W_argsort =  pd.read_table( "GBM_normalized/argsort.txt", delim_whitespace=True).to_numpy()

gene      =  pd.read_table( "GBM/GLIO_Gene_Expression.txt", delim_whitespace=True).to_numpy().transpose()
methy     =  pd.read_table( "GBM/GLIO_Methy_Expression.txt", delim_whitespace=True).to_numpy().transpose()
mirna     =  pd.read_table( "GBM/GLIO_Mirna_Expression.txt", delim_whitespace=True).to_numpy().transpose()
truelabel =  pd.read_table( "GBM/GLIO_Survival.txt", delim_whitespace=True).to_numpy()

order = W_argsort.reshape(-1)-1

gene = gene[order]
methy = methy[order]
mirna = mirna[order]
truelabel = truelabel[order]

print(gene.shape)
print(methy.shape)
print(mirna.shape)

def get_nomralized_laplacian(a):
    b = np.diag(np.power(1.0 / a.sum(axis=1), 0.5))
    return (b @ a @ b)




# Some preprocessing
adj = W_gene
features = np.c_[mirna,methy]


# adj = W_methy
# features = np.c_[gene,mirna]


adj_norm = get_nomralized_laplacian(adj)
num_features = features.shape[1]

#pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
#norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

pos_weight = 1
norm = 1







(215, 12042)
(215, 1305)
(215, 534)


In [8]:
#Model definition Cell


adj_label_tensor = torch.Tensor(adj)
adj_norm_tensor  = torch.Tensor(adj_norm)
features_tensor  = torch.Tensor(features)

# from the previous cell following arrays are obtained : 
#adj_label
#adj_norm
#features
#val_edges_false
#test_edges
#test_edges_false




class Model(torch.nn.Module):

    def __init__(self,**kwargs):
        super(Model, self).__init__(**kwargs)
        
        
        
            

        self.W0 = torch.nn.Linear(num_features,6000)
        self.W1 = torch.nn.Linear(6000,3000)
        self.W2 = torch.nn.Linear(3000,1500)
        self.W3_mean = torch.nn.Linear(1500,700)
        self.W3_log_std = torch.nn.Linear(1500,700)
        self.W4 = torch.nn.Linear(700,1500)
        self.W5 = torch.nn.Linear(1500,3000)
        self.W6 = torch.nn.Linear(3000,6000)
        self.W7 = torch.nn.Linear(6000,num_features)

        
        param_list = []
        param_list += list(self.W0.parameters())
        param_list += list(self.W1.parameters())
        param_list += list(self.W2.parameters())
        param_list += list(self.W3_mean.parameters())
        param_list += list(self.W3_log_std.parameters())
        param_list += list(self.W4.parameters())
        param_list += list(self.W5.parameters())
        param_list += list(self.W6.parameters())
        param_list += list(self.W7.parameters())
        

        self.optimizer = torch.optim.Adam(param_list,lr=0.0001)
        self.recon = None
        
        self.normal_dist = torch.distributions.MultivariateNormal(loc=torch.zeros(700),scale_tril=torch.eye(700))


        
        
        

    


        
    def forward(self,x) : 
        
        self.layer_1 = torch.nn.functional.tanh(self.W0(x))
        self.layer_2 = torch.nn.functional.tanh(self.W1(self.layer_1))
        self.layer_3 = torch.nn.functional.tanh(self.W2(self.layer_2))
        
        self.layer_4_mean =    torch.nn.functional.tanh(self.W3_mean(self.layer_3))
        self.layer_4_log_std = torch.nn.functional.tanh(self.W3_log_std(self.layer_3))
        self.layer_4 = self.layer_4_mean + self.normal_dist.sample((x.shape[0],)) * torch.exp(self.layer_4_log_std)
        
        self.layer_5 = torch.nn.functional.tanh(self.W4(self.layer_4))
        self.layer_6 = torch.nn.functional.tanh(self.W5(self.layer_5))
        self.layer_7 = torch.nn.functional.tanh(self.W6(self.layer_6))
        self.layer_8 = self.W7(self.layer_7)

        self.x_prim = self.layer_8
        self.embedding = self.layer_4
        self.recon_loss = ((self.x_prim-x)**2).sum(1).mean(0)
        
        
        
        
        self.kl_loss =   torch.mean(torch.sum(1 + 2 * self.layer_4_log_std - self.layer_4_mean**2 -torch.exp(self.layer_4_log_std)**2, 1))
        self.kl_loss = (-100)*self.kl_loss
        
        self.loss = self.recon_loss+self.kl_loss
        
        print(self.recon_loss.item(),self.kl_loss.item())
        
        return self.loss.item()
        
    def backward(self) :
        
        self.optimizer.zero_grad()
        self.loss.backward()
        self.optimizer.step()

        
        
    def get_cosine_sim_emb(self):
        #print(my_model.embedding)
        #my_model.forward(features_tensor)
        #my_model.recon = torch.sigmoid(my_model.embedding@my_model.embedding.transpose(0,1))
        emb = self.embedding.detach().numpy()

        norms =  (emb*emb).sum(1).reshape(-1,1)**0.5
        normalized_emb = emb/norms

        emb_matrix = np.abs( normalized_emb@normalized_emb.transpose())
        return emb_matrix
        

    def get_euclidian(self):
        emb = self.embedding.detach().numpy()
        
        emb_matrix = np.zeros((emb.shape[0],emb.shape[0]))
        
        for i in range(emb.shape[0]):
            for j in range(emb.shape[0]):
                emb_matrix[i,j] = ((emb[i]-emb[j])**2).sum()
                
        return emb_matrix
        
        



In [9]:
#Training Cell 

my_model = Model()


for epoch_counter in range(2000):
    
    epoch_loss = my_model.forward(features_tensor)
    my_model.backward()
    
    #print("Epoch:", '%04d' % (epoch_counter + 1), "train_loss=", "{:.5f}".format(epoch_loss))


print('\n',"#########################################################",'\n')






1837.794189453125 1367.6436767578125
1837.3629150390625 951.9002075195312
1837.4283447265625 698.1837158203125
1837.80029296875 546.8432006835938
1839.406005859375 440.5718688964844
1836.085205078125 364.0937194824219
1838.8226318359375 307.62017822265625
1837.3978271484375 264.031494140625
1837.7088623046875 228.1909637451172
1836.4710693359375 199.85752868652344
1836.470458984375 177.1591033935547
1836.7459716796875 157.65744018554688
1838.9998779296875 140.66615295410156
1836.945068359375 126.02859497070312
1836.595947265625 113.44721221923828
1835.835205078125 102.41234588623047
1836.2261962890625 92.36347961425781
1835.3043212890625 83.33769989013672
1834.64501953125 75.61544036865234
1833.0765380859375 68.93965911865234
1836.455078125 62.903873443603516
1841.3057861328125 57.39759826660156
1834.9720458984375 52.39773941040039
1837.6778564453125 47.92540740966797
1835.9268798828125 43.924808502197266
1833.8797607421875 40.250858306884766
1832.01611328125 36.98444747924805
1832.268

KeyboardInterrupt: 

In [111]:
#my_model.forward(features_tensor)
emb = my_model.embedding.detach().numpy()
        
emb_matrix = np.zeros((emb.shape[0],emb.shape[0]))
        
for i in range(emb.shape[0]):
    for j in range(emb.shape[0]):
        #emb_matrix[i,j] =  ((emb[i]-emb[j])**2).sum()
        emb_matrix[i,j] =  np.exp(-((emb[i]-emb[j])**2).sum()/30)
                
print(emb_matrix)

[[1.00000000e+00 4.62059238e-05 2.08782561e-04 1.39674153e-04
  1.58717066e-05 2.85838522e-04 2.46216716e-05 2.24410124e-05
  2.67232549e-05 1.38244238e-05 1.42606430e-04 2.89775960e-06
  8.48307470e-05 2.14882091e-05 1.07631705e-03 1.66432901e-05
  6.21830496e-04 1.35656849e-05 6.07292509e-05 2.83008170e-04
  6.82441790e-05 1.15383811e-04 2.65193054e-04 2.99011466e-05
  1.66329446e-04 6.18543111e-05 8.05582568e-05 7.73897284e-05
  6.55712113e-06 3.30879581e-05 7.53830537e-05 1.89858999e-05
  1.77954602e-05 7.01380464e-05 5.27309910e-05 1.34706734e-04
  5.65551643e-04 1.89215361e-04 9.87696352e-05 2.94275423e-05
  2.44596306e-05 8.47680342e-05 4.10909232e-05 2.64860706e-05
  2.18554824e-05 1.04332032e-04 9.07979979e-06 1.76373582e-06
  1.37257186e-05 1.00953230e-05 1.65736153e-05 1.47574538e-04
  4.07939166e-04 2.49984693e-05 2.43177890e-04 1.90936307e-06
  3.06023938e-06 2.90234403e-04 5.26666212e-06 1.15304493e-04
  8.25604692e-05 2.37917828e-04 9.01403315e-05 4.90234565e-05
  3.3803

In [112]:

np.savetxt("gvae_fusion_results/res1.txt",emb_matrix,delimiter='\t')

In [130]:
from sklearn.cluster import KMeans
X = emb
n_clusters=10
kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(X)

for i in range(n_clusters):
    indx = np.argwhere(kmeans.labels_==i).reshape(-1)
    print(truelabel[indx][:,1].mean())



332.27272727272725
644.8
768.25
547.3529411764706
300.05555555555554
448.02941176470586
536.9032258064516
448.4166666666667
584.6
1542.8333333333333
