first commit

Macau-LYXia · Mar 30, 2022 · c0858c8 · c0858c8
commit c0858c8
Show file tree

Hide file tree

Showing 141 changed files with 24,335 additions and 0 deletions.
diff --git a/data preprocessing/GetPPMIMatrix.m b/data preprocessing/GetPPMIMatrix.m
@@ -0,0 +1,19 @@
+%% Calculate Positive Pointwise Mutual Information Matrix %%
+
+function PPMI = GetPPMIMatrix(M)
+
+M = ScaleSimMat(M);
+
+[p, q] = size(M);
+assert(p==q, 'M must be a square matrix!');
+
+col = sum(M);
+row = sum(M,2);
+
+D = sum(col);
+PPMI = log(D * M ./(row*col));
+PPMI(PPMI<0)=0;
+IdxNan = isnan(PPMI);
+PPMI(IdxNan) = 0;
+
+end
diff --git a/data preprocessing/RandSurf.m b/data preprocessing/RandSurf.m
@@ -0,0 +1,17 @@
+%% Randomly Surf %%
+%for more details, pls see our paper
+
+function M = RandSurf(A, max_step, alpha)
+num_nodes = length(A);
+A = ScaleSimMat(A);
+
+P0 = eye(num_nodes, num_nodes);
+P = P0;
+M = zeros(num_nodes, num_nodes);
+
+for i = 1: max_step
+    P = alpha*P*A + (1-alpha)*P0;
+    M = M + P;
+end
+
+end
diff --git a/data preprocessing/ScaleSimMat.m b/data preprocessing/ScaleSimMat.m
@@ -0,0 +1,9 @@
+%% Scale Similar Matrix by Row %%
+
+function W = ScaleSimMat(W)
+
+%scale 
+W = W - diag(diag(W));  %diagonal elements must be 0
+D = diag(sum(W), 0);    %degree matrix
+
+W = pinv(D)*W;
diff --git a/data preprocessing/compute_similarity.m b/data preprocessing/compute_similarity.m
@@ -0,0 +1,23 @@
+clc
+clear
+Nets = {'proteinDisease','drugsideEffect', 'drugDisease'};
+
+for i = 1 : length(Nets)
+	tic
+	inputID = char(strcat( Nets(i), '.txt'));
+	M = load(inputID);
+	Sim = 1 - pdist(M, 'jaccard');
+	Sim = squareform(Sim);
+	Sim = Sim + eye(size(M,1));
+	Sim(isnan(Sim)) = 0;
+	outputID = char(strcat('../Sim_', Nets(i), '.txt'));
+	dlmwrite(outputID, Sim, '\t');
+	toc
+end
+
+% % write chemical similariy to network/
+% M = load('../data/Similarity_Matrix_Drugs.txt');
+% dlmwrite('../network/Sim_mat_Drugs.txt',  M, '\t');
+% % write sequence similarity to network/
+% M = load('../data/Similarity_Matrix_Proteins.txt');
+% dlmwrite('../network/Sim_mat_Proteins.txt',  M, '\t');
diff --git a/data preprocessing/data/Sim_drugDisease.txt b/data preprocessing/data/Sim_drugDisease.txt
diff --git a/data preprocessing/data/Sim_drugsideEffect.txt b/data preprocessing/data/Sim_drugsideEffect.txt
diff --git a/data preprocessing/data/Sim_proteinDisease.txt b/data preprocessing/data/Sim_proteinDisease.txt
diff --git a/data preprocessing/data/drugsim1network.txt b/data preprocessing/data/drugsim1network.txt
diff --git a/data preprocessing/data/drugsim2network.txt b/data preprocessing/data/drugsim2network.txt
diff --git a/data preprocessing/data/drugsim3network.txt b/data preprocessing/data/drugsim3network.txt
diff --git a/data preprocessing/data/drugsim4network.txt b/data preprocessing/data/drugsim4network.txt
diff --git a/data preprocessing/data/drugsim5network.txt b/data preprocessing/data/drugsim5network.txt
diff --git a/data preprocessing/data/drugsim6network.txt b/data preprocessing/data/drugsim6network.txt
diff --git a/data preprocessing/data/proteinprotein.txt b/data preprocessing/data/proteinprotein.txt
diff --git a/data preprocessing/data/proteinsim1network.txt b/data preprocessing/data/proteinsim1network.txt
diff --git a/data preprocessing/data/proteinsim2network.txt b/data preprocessing/data/proteinsim2network.txt
diff --git a/data preprocessing/data/proteinsim3network.txt b/data preprocessing/data/proteinsim3network.txt
diff --git a/data preprocessing/data/proteinsim4network.txt b/data preprocessing/data/proteinsim4network.txt
diff --git a/data preprocessing/feature/Sim_drugDisease.mat b/data preprocessing/feature/Sim_drugDisease.mat
diff --git a/data preprocessing/feature/Sim_drugsideEffect.mat b/data preprocessing/feature/Sim_drugsideEffect.mat
diff --git a/data preprocessing/feature/Sim_proteinDisease.mat b/data preprocessing/feature/Sim_proteinDisease.mat
diff --git a/data preprocessing/feature/drugdrug.mat b/data preprocessing/feature/drugdrug.mat
diff --git a/data preprocessing/feature/drugsim1network.mat b/data preprocessing/feature/drugsim1network.mat
diff --git a/data preprocessing/feature/drugsim2network.mat b/data preprocessing/feature/drugsim2network.mat
diff --git a/data preprocessing/feature/drugsim3network.mat b/data preprocessing/feature/drugsim3network.mat
diff --git a/data preprocessing/feature/drugsim4network.mat b/data preprocessing/feature/drugsim4network.mat
diff --git a/data preprocessing/feature/drugsim5network.mat b/data preprocessing/feature/drugsim5network.mat
diff --git a/data preprocessing/feature/drugsim6network.mat b/data preprocessing/feature/drugsim6network.mat
diff --git a/data preprocessing/feature/proteinprotein.mat b/data preprocessing/feature/proteinprotein.mat
diff --git a/data preprocessing/feature/proteinsim1network.mat b/data preprocessing/feature/proteinsim1network.mat
diff --git a/data preprocessing/feature/proteinsim2network.mat b/data preprocessing/feature/proteinsim2network.mat
diff --git a/data preprocessing/feature/proteinsim3network.mat b/data preprocessing/feature/proteinsim3network.mat
diff --git a/data preprocessing/feature/proteinsim4network.mat b/data preprocessing/feature/proteinsim4network.mat
diff --git a/data preprocessing/main.m b/data preprocessing/main.m
@@ -0,0 +1,11 @@
+clc
+clear
+Kstep = 3;
+alpha = 0.98;
+str = 'Sim_proteinDisease';
+data=load(strcat('../data/',str,'.txt'));
+[m,n]=size(data);
+Mk = RandSurf(data, Kstep, alpha);
+PPMI = GetPPMIMatrix(Mk);
+rep_sim1_drug = PPMI;
+save(strcat('../code/feature/',str,'.mat'),'rep_sim1_drug'); 
diff --git a/embedding/SEMD.py b/embedding/SEMD.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Mar 12 18:44:19 2022
+
+@author: DELL
+"""
+
+from sklearn.metrics import r2_score
+import numpy as np
+import scipy.io as scio
+import os 
+import csv
+from VAE3 import *
+import time
+startTime = time.time()
+def feature_normalization(feat_norm):
+    for i in range(0,feat_norm.shape[1]):
+        norm_temp = np.linalg.norm(feat_norm[:,i],ord=2)
+        if norm_temp > 0:
+             feat_norm[:,i] = feat_norm[:,i]/norm_temp;
+    return feat_norm
+
+def matrix_hstack(feat):
+    feat_feat = feat[0]
+    for i in range(1,len(feat)):
+        feat_feat_temp = feat[i]
+        feat_feat =  np.hstack((feat_feat, feat_feat_temp))
+        feat_feat = feature_normalization(feat_feat)
+    return feat_feat 
+
+def feiling(interaction,dd):
+    aa = []
+    bb = []
+    ii=  0
+    sum = 0
+    for i in range(np.shape(interaction)[0]): # 行732
+        for j in range(np.shape(interaction)[1]): # 列 1915
+            if interaction[i][j]!=0:
+                ii=ii+1
+                temp = np.square(interaction[i][j]-dd[i][j])
+                sum = sum + temp 
+                aa.append(interaction[i][j])
+                bb.append(dd[i][j])
+   # print(sum)
+   # value =np.sqrt(sum/ii)
+    value =r2_score(aa,bb)
+    print(value)
+    return value
+
+def outputCSVfile(filename,data):
+    csvfile=open(filename,'w')
+    writer=csv.writer(csvfile)
+    writer.writerows(data)
+    csvfile.close()
+
+dataFile = r'C:\Users\DELL\Desktop\DTI\dti\code\feature'
+file = os.listdir(dataFile)
+drugFeature = []
+proteinFeature = []
+
+for file_index  in  file:
+    data = scio.loadmat(dataFile + '//' + file_index)
+    print(file_index)
+    data = data['rep_sim1_drug']
+    data = data.astype('float32') / np.max(data)
+    original_dim =  data.shape[0]
+    if original_dim == 732:
+      #  drugF = FeatureExtraction(original_dim,data)
+        drugFeature.append(data) 
+    else:
+       # proteinF = FeatureExtraction(original_dim,data)
+        proteinFeature.append(data) 
+
+
+drugF = matrix_hstack(drugFeature)      
+drug_feat, decoded_drug = FeatureExtraction_d(drugF.shape[1],drugF)
+N_drugF = matrix_hstack(decoded_drug)      
+
+drug =[]
+N_d =  int(np.shape(drugF)[1]/9)
+for i in (range(0,9)): # 行732
+        ii = i*N_d
+        temp =  feiling(drugF[:,ii:ii+N_d],N_drugF[:,ii:ii+N_d])
+        drug.append(temp)
+
+proteinF = matrix_hstack(proteinFeature)  
+prot_feat,decoded_protein = FeatureExtraction_p(proteinF.shape[1],proteinF)        
+N_protienF = matrix_hstack(decoded_protein)  
+
+
+protien =[]
+N_t =  int(np.shape(proteinF)[1]/6)
+for j in (range(0,6)): # 行732
+        jj = j*N_t
+        temp =  feiling(proteinF[:,jj:jj+N_t],N_protienF [:,jj:jj+N_t])
+        protien.append(temp)
+#scio.savemat('drug_feat.mat', {'drug_feat':drug_feat})
+#scio.savemat('prot_feat.mat', {'prot_feat':prot_feat})
+
+
+
+outputCSVfile('drugFeature.txt',drug_feat)
+outputCSVfile('proteinFeature.txt',prot_feat)
+# endTime = time.time()
+# tt  = endTime-startTime
+# print('The time of code  is: %s' %tt)
diff --git a/embedding/VAE3.py b/embedding/VAE3.py
@@ -0,0 +1,199 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Mar 12 16:54:38 2022
+
+@author: DELL
+"""
+from keras import regularizers
+from keras.layers import Input, Dense, Lambda,Dropout,concatenate
+from keras.models import Model
+from keras import backend as K
+from keras.losses import  binary_crossentropy
+import numpy as np
+import matplotlib.pyplot as plt
+import scipy.io as scio
+import os 
+
+np.random.seed(116)
+
+#original_dim = 732
+
+intermediate_dim = 500
+intermediate_dim1 = 200
+#intermediate_dim2 = 100
+latent_dim = 100
+#latent_dim = 100
+
+decoded_dp = []
+decoded_dp1 = []
+def sampling(args):
+    z_mean, z_log_sigma = args
+    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim),
+                              mean=0., stddev=0.1)
+    return z_mean + K.exp(z_log_sigma) * epsilon
+
+
+def FeatureExtraction_d(original_dim,data):
+
+    inputs = Input(shape=(original_dim,))
+    h = Dense(intermediate_dim, activation='relu',activity_regularizer=regularizers.l1(10e-5))(inputs)
+    h = Dropout(0.2)(h)
+    h = Dense(intermediate_dim1, activation='relu',activity_regularizer=regularizers.l1(10e-5))(h)
+    h = Dropout(0.2)(h)
+  #  h = Dense(intermediate_dim2, activation='relu',activity_regularizer=regularizers.l1(10e-5))(h)
+  #  h = Dropout(0.2)(h)
+    z_mean = Dense(latent_dim)(h)
+    z_log_sigma = Dense(latent_dim)(h)
+    z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])
+    encoder = Model(inputs, [z_mean, z_log_sigma, z], name='encoder')
+
+    latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
+    # 解码层，也就是生成器部分
+    #x = Dense(intermediate_dim2, activation='relu',activity_regularizer=regularizers.l1(10e-5))(latent_inputs)
+    #x = Dropout(0.2)(x)
+    x = Dense(intermediate_dim1, activation='relu',activity_regularizer=regularizers.l1(10e-5))(latent_inputs)
+    x = Dropout(0.2)(x)
+    x = Dense(intermediate_dim, activation='relu',activity_regularizer=regularizers.l1(10e-5))(x)
+    x = Dropout(0.2)(x)
+
+    outputs = Dense(732, activation='sigmoid')(x)
+    decoder1 = Model(latent_inputs, outputs, name='decoder1')
+    decoder2 = Model(latent_inputs, outputs, name='decoder2')
+    decoder3 = Model(latent_inputs, outputs, name='decoder3')
+    decoder4 = Model(latent_inputs, outputs, name='decoder4')
+    decoder5 = Model(latent_inputs, outputs, name='decoder5')
+    decoder6 = Model(latent_inputs, outputs, name='decoder6')
+    decoder7 = Model(latent_inputs, outputs, name='decoder7')
+    decoder8 = Model(latent_inputs, outputs, name='decoder8')      
+    decoder9 = Model(latent_inputs, outputs, name='decoder9')
+    outputs1 = decoder1(encoder(inputs)[2])
+    outputs2 = decoder2(encoder(inputs)[2])
+    outputs3 = decoder3(encoder(inputs)[2])
+    outputs4 = decoder4(encoder(inputs)[2])
+    outputs5 = decoder5(encoder(inputs)[2])
+    outputs6 = decoder6(encoder(inputs)[2])
+    outputs7 = decoder7(encoder(inputs)[2])
+    outputs8 = decoder8(encoder(inputs)[2])
+    outputs9 = decoder9(encoder(inputs)[2])
+    outputs =  concatenate([outputs1,outputs2,
+                              outputs3,outputs4,
+                              outputs5,outputs6,
+                              outputs7,outputs8,
+                              outputs9],axis =1) 
+
+    # 建立模型
+    vae = Model(inputs,outputs, name='vae_mlp')
+    reconstruction_loss = binary_crossentropy(inputs, outputs)
+    reconstruction_loss *= original_dim
+    kl_loss = 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma)
+    kl_loss = K.sum(kl_loss, axis=-1)
+    kl_loss *= -0.5
+    vae_loss = K.mean(reconstruction_loss + kl_loss)
+    vae.add_loss(vae_loss)
+    vae.compile(optimizer='adam')
+    vae.summary()
+
+    history = vae.fit(data,data,
+              epochs=1,
+              batch_size=100,
+              shuffle=True,
+              validation_data=(data, data))
+
+    val_loss = history.history['val_loss']
+    encoder = Model(inputs, z_mean, name='encoder')
+    feature = encoder.predict(data, batch_size=100)
+    decoded_img1 = decoder1.predict(feature)
+    decoded_img2 = decoder2.predict(feature)
+    decoded_img3 = decoder3.predict(feature)
+    decoded_img4 = decoder4.predict(feature)
+    decoded_img5 = decoder5.predict(feature)
+    decoded_img6 = decoder6.predict(feature)
+    decoded_img7 = decoder7.predict(feature)
+    decoded_img8 = decoder8.predict(feature)
+    decoded_img9 = decoder9.predict(feature)
+    decoded_dp.append(decoded_img1)
+    decoded_dp.append(decoded_img2)
+    decoded_dp.append(decoded_img3)
+    decoded_dp.append(decoded_img4)
+    decoded_dp.append(decoded_img5)
+    decoded_dp.append(decoded_img6)
+    decoded_dp.append(decoded_img7)
+    decoded_dp.append(decoded_img8)
+    decoded_dp.append(decoded_img9)
+    return  feature, decoded_dp
+
+def FeatureExtraction_p(original_dim,data):
+    inputs = Input(shape=(original_dim,))
+    h = Dense(intermediate_dim, activation='relu',activity_regularizer=regularizers.l1(10e-5))(inputs)
+    h = Dropout(0.2)(h)
+    h = Dense(intermediate_dim1, activation='relu',activity_regularizer=regularizers.l1(10e-5))(h)
+    h = Dropout(0.2)(h)
+  #  h = Dense(intermediate_dim2, activation='relu',activity_regularizer=regularizers.l1(10e-5))(h)
+  #  h = Dropout(0.2)(h)
+    z_mean = Dense(latent_dim)(h)
+    z_log_sigma = Dense(latent_dim)(h)
+    z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])
+    encoder = Model(inputs, [z_mean, z_log_sigma, z], name='encoder')
+
+    latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
+    # 解码层，也就是生成器部分
+    #x = Dense(intermediate_dim2, activation='relu',activity_regularizer=regularizers.l1(10e-5))(latent_inputs)
+    #x = Dropout(0.2)(x)
+    x = Dense(intermediate_dim1, activation='relu',activity_regularizer=regularizers.l1(10e-5))(latent_inputs)
+    x = Dropout(0.2)(x)
+    x = Dense(intermediate_dim, activation='relu',activity_regularizer=regularizers.l1(10e-5))(x)
+    x = Dropout(0.2)(x)
+
+    outputs = Dense(1915, activation='sigmoid')(x)
+    decoder1 = Model(latent_inputs, outputs, name='decoder1')
+    decoder2 = Model(latent_inputs, outputs, name='decoder2')
+    decoder3 = Model(latent_inputs, outputs, name='decoder3')
+    decoder4 = Model(latent_inputs, outputs, name='decoder4')
+    decoder5 = Model(latent_inputs, outputs, name='decoder5')
+    decoder6 = Model(latent_inputs, outputs, name='decoder6')
+    outputs1 = decoder1(encoder(inputs)[2])
+    outputs2 = decoder2(encoder(inputs)[2])
+    outputs3 = decoder3(encoder(inputs)[2])
+    outputs4 = decoder4(encoder(inputs)[2])
+    outputs5 = decoder5(encoder(inputs)[2])
+    outputs6 = decoder6(encoder(inputs)[2])
+    outputs =  concatenate([outputs1,outputs2,
+                              outputs3,outputs4,
+                              outputs5,outputs6],axis =1)
+    # 建立模型
+    vae = Model(inputs,outputs, name='vae_mlp')
+    reconstruction_loss = binary_crossentropy(inputs, outputs)
+    reconstruction_loss *= original_dim
+    kl_loss = 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma)
+    kl_loss = K.sum(kl_loss, axis=-1)
+    kl_loss *= -0.5
+    vae_loss = K.mean(reconstruction_loss + kl_loss)
+    vae.add_loss(vae_loss)
+    vae.compile(optimizer='adam')
+    vae.summary()
+
+    history = vae.fit(data,data,
+              epochs=1,
+              batch_size=100,
+              shuffle=True,
+              validation_data=(data, data))
+
+    val_loss = history.history['val_loss']
+    encoder = Model(inputs, z_mean, name='encoder')
+    feature = encoder.predict(data, batch_size=100)
+
+
+    decoded_img11 = decoder1.predict(feature)
+    decoded_img22 = decoder2.predict(feature)
+    decoded_img33 = decoder3.predict(feature)
+    decoded_img44 = decoder4.predict(feature)
+    decoded_img55 = decoder5.predict(feature)
+    decoded_img66 = decoder6.predict(feature)
+
+    decoded_dp1.append(decoded_img11)
+    decoded_dp1.append(decoded_img22)
+    decoded_dp1.append(decoded_img33)
+    decoded_dp1.append(decoded_img44)
+    decoded_dp1.append(decoded_img55)
+    decoded_dp1.append(decoded_img66)
+    return  feature, decoded_dp1