Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Your Name committed Mar 30, 2022
0 parents commit c0858c8
Show file tree
Hide file tree
Showing 141 changed files with 24,335 additions and 0 deletions.
19 changes: 19 additions & 0 deletions data preprocessing/GetPPMIMatrix.m
@@ -0,0 +1,19 @@
%% Calculate Positive Pointwise Mutual Information Matrix %%

function PPMI = GetPPMIMatrix(M)

M = ScaleSimMat(M);

[p, q] = size(M);
assert(p==q, 'M must be a square matrix!');

col = sum(M);
row = sum(M,2);

D = sum(col);
PPMI = log(D * M ./(row*col));
PPMI(PPMI<0)=0;
IdxNan = isnan(PPMI);
PPMI(IdxNan) = 0;

end
17 changes: 17 additions & 0 deletions data preprocessing/RandSurf.m
@@ -0,0 +1,17 @@
%% Randomly Surf %%
%for more details, pls see our paper

function M = RandSurf(A, max_step, alpha)
num_nodes = length(A);
A = ScaleSimMat(A);

P0 = eye(num_nodes, num_nodes);
P = P0;
M = zeros(num_nodes, num_nodes);

for i = 1: max_step
P = alpha*P*A + (1-alpha)*P0;
M = M + P;
end

end
9 changes: 9 additions & 0 deletions data preprocessing/ScaleSimMat.m
@@ -0,0 +1,9 @@
%% Scale Similar Matrix by Row %%

function W = ScaleSimMat(W)

%scale
W = W - diag(diag(W)); %diagonal elements must be 0
D = diag(sum(W), 0); %degree matrix

W = pinv(D)*W;
23 changes: 23 additions & 0 deletions data preprocessing/compute_similarity.m
@@ -0,0 +1,23 @@
clc
clear
Nets = {'proteinDisease','drugsideEffect', 'drugDisease'};

for i = 1 : length(Nets)
tic
inputID = char(strcat( Nets(i), '.txt'));
M = load(inputID);
Sim = 1 - pdist(M, 'jaccard');
Sim = squareform(Sim);
Sim = Sim + eye(size(M,1));
Sim(isnan(Sim)) = 0;
outputID = char(strcat('../Sim_', Nets(i), '.txt'));
dlmwrite(outputID, Sim, '\t');
toc
end

% % write chemical similariy to network/
% M = load('../data/Similarity_Matrix_Drugs.txt');
% dlmwrite('../network/Sim_mat_Drugs.txt', M, '\t');
% % write sequence similarity to network/
% M = load('../data/Similarity_Matrix_Proteins.txt');
% dlmwrite('../network/Sim_mat_Proteins.txt', M, '\t');
732 changes: 732 additions & 0 deletions data preprocessing/data/Sim_drugDisease.txt

Large diffs are not rendered by default.

732 changes: 732 additions & 0 deletions data preprocessing/data/Sim_drugsideEffect.txt

Large diffs are not rendered by default.

1,915 changes: 1,915 additions & 0 deletions data preprocessing/data/Sim_proteinDisease.txt

Large diffs are not rendered by default.

732 changes: 732 additions & 0 deletions data preprocessing/data/drugsim1network.txt

Large diffs are not rendered by default.

732 changes: 732 additions & 0 deletions data preprocessing/data/drugsim2network.txt

Large diffs are not rendered by default.

732 changes: 732 additions & 0 deletions data preprocessing/data/drugsim3network.txt

Large diffs are not rendered by default.

732 changes: 732 additions & 0 deletions data preprocessing/data/drugsim4network.txt

Large diffs are not rendered by default.

732 changes: 732 additions & 0 deletions data preprocessing/data/drugsim5network.txt

Large diffs are not rendered by default.

732 changes: 732 additions & 0 deletions data preprocessing/data/drugsim6network.txt

Large diffs are not rendered by default.

1,915 changes: 1,915 additions & 0 deletions data preprocessing/data/proteinprotein.txt

Large diffs are not rendered by default.

1,915 changes: 1,915 additions & 0 deletions data preprocessing/data/proteinsim1network.txt

Large diffs are not rendered by default.

1,915 changes: 1,915 additions & 0 deletions data preprocessing/data/proteinsim2network.txt

Large diffs are not rendered by default.

1,915 changes: 1,915 additions & 0 deletions data preprocessing/data/proteinsim3network.txt

Large diffs are not rendered by default.

1,915 changes: 1,915 additions & 0 deletions data preprocessing/data/proteinsim4network.txt

Large diffs are not rendered by default.

Binary file added data preprocessing/feature/Sim_drugDisease.mat
Binary file not shown.
Binary file added data preprocessing/feature/Sim_drugsideEffect.mat
Binary file not shown.
Binary file added data preprocessing/feature/Sim_proteinDisease.mat
Binary file not shown.
Binary file added data preprocessing/feature/drugdrug.mat
Binary file not shown.
Binary file added data preprocessing/feature/drugsim1network.mat
Binary file not shown.
Binary file added data preprocessing/feature/drugsim2network.mat
Binary file not shown.
Binary file added data preprocessing/feature/drugsim3network.mat
Binary file not shown.
Binary file added data preprocessing/feature/drugsim4network.mat
Binary file not shown.
Binary file added data preprocessing/feature/drugsim5network.mat
Binary file not shown.
Binary file added data preprocessing/feature/drugsim6network.mat
Binary file not shown.
Binary file added data preprocessing/feature/proteinprotein.mat
Binary file not shown.
Binary file added data preprocessing/feature/proteinsim1network.mat
Binary file not shown.
Binary file added data preprocessing/feature/proteinsim2network.mat
Binary file not shown.
Binary file not shown.
Binary file added data preprocessing/feature/proteinsim4network.mat
Binary file not shown.
11 changes: 11 additions & 0 deletions data preprocessing/main.m
@@ -0,0 +1,11 @@
clc
clear
Kstep = 3;
alpha = 0.98;
str = 'Sim_proteinDisease';
data=load(strcat('../data/',str,'.txt'));
[m,n]=size(data);
Mk = RandSurf(data, Kstep, alpha);
PPMI = GetPPMIMatrix(Mk);
rep_sim1_drug = PPMI;
save(strcat('../code/feature/',str,'.mat'),'rep_sim1_drug');
106 changes: 106 additions & 0 deletions embedding/SEMD.py
@@ -0,0 +1,106 @@
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 12 18:44:19 2022
@author: DELL
"""

from sklearn.metrics import r2_score
import numpy as np
import scipy.io as scio
import os
import csv
from VAE3 import *
import time
startTime = time.time()
def feature_normalization(feat_norm):
for i in range(0,feat_norm.shape[1]):
norm_temp = np.linalg.norm(feat_norm[:,i],ord=2)
if norm_temp > 0:
feat_norm[:,i] = feat_norm[:,i]/norm_temp;
return feat_norm

def matrix_hstack(feat):
feat_feat = feat[0]
for i in range(1,len(feat)):
feat_feat_temp = feat[i]
feat_feat = np.hstack((feat_feat, feat_feat_temp))
feat_feat = feature_normalization(feat_feat)
return feat_feat

def feiling(interaction,dd):
aa = []
bb = []
ii= 0
sum = 0
for i in range(np.shape(interaction)[0]): # 行732
for j in range(np.shape(interaction)[1]): # 列 1915
if interaction[i][j]!=0:
ii=ii+1
temp = np.square(interaction[i][j]-dd[i][j])
sum = sum + temp
aa.append(interaction[i][j])
bb.append(dd[i][j])
# print(sum)
# value =np.sqrt(sum/ii)
value =r2_score(aa,bb)
print(value)
return value

def outputCSVfile(filename,data):
csvfile=open(filename,'w')
writer=csv.writer(csvfile)
writer.writerows(data)
csvfile.close()

dataFile = r'C:\Users\DELL\Desktop\DTI\dti\code\feature'
file = os.listdir(dataFile)
drugFeature = []
proteinFeature = []

for file_index in file:
data = scio.loadmat(dataFile + '//' + file_index)
print(file_index)
data = data['rep_sim1_drug']
data = data.astype('float32') / np.max(data)
original_dim = data.shape[0]
if original_dim == 732:
# drugF = FeatureExtraction(original_dim,data)
drugFeature.append(data)
else:
# proteinF = FeatureExtraction(original_dim,data)
proteinFeature.append(data)


drugF = matrix_hstack(drugFeature)
drug_feat, decoded_drug = FeatureExtraction_d(drugF.shape[1],drugF)
N_drugF = matrix_hstack(decoded_drug)

drug =[]
N_d = int(np.shape(drugF)[1]/9)
for i in (range(0,9)): # 行732
ii = i*N_d
temp = feiling(drugF[:,ii:ii+N_d],N_drugF[:,ii:ii+N_d])
drug.append(temp)

proteinF = matrix_hstack(proteinFeature)
prot_feat,decoded_protein = FeatureExtraction_p(proteinF.shape[1],proteinF)
N_protienF = matrix_hstack(decoded_protein)


protien =[]
N_t = int(np.shape(proteinF)[1]/6)
for j in (range(0,6)): # 行732
jj = j*N_t
temp = feiling(proteinF[:,jj:jj+N_t],N_protienF [:,jj:jj+N_t])
protien.append(temp)
#scio.savemat('drug_feat.mat', {'drug_feat':drug_feat})
#scio.savemat('prot_feat.mat', {'prot_feat':prot_feat})



outputCSVfile('drugFeature.txt',drug_feat)
outputCSVfile('proteinFeature.txt',prot_feat)
# endTime = time.time()
# tt = endTime-startTime
# print('The time of code is: %s' %tt)
199 changes: 199 additions & 0 deletions embedding/VAE3.py
@@ -0,0 +1,199 @@
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 12 16:54:38 2022
@author: DELL
"""
from keras import regularizers
from keras.layers import Input, Dense, Lambda,Dropout,concatenate
from keras.models import Model
from keras import backend as K
from keras.losses import binary_crossentropy
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as scio
import os

np.random.seed(116)

#original_dim = 732

intermediate_dim = 500
intermediate_dim1 = 200
#intermediate_dim2 = 100
latent_dim = 100
#latent_dim = 100

decoded_dp = []
decoded_dp1 = []
def sampling(args):
z_mean, z_log_sigma = args
epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim),
mean=0., stddev=0.1)
return z_mean + K.exp(z_log_sigma) * epsilon


def FeatureExtraction_d(original_dim,data):

inputs = Input(shape=(original_dim,))
h = Dense(intermediate_dim, activation='relu',activity_regularizer=regularizers.l1(10e-5))(inputs)
h = Dropout(0.2)(h)
h = Dense(intermediate_dim1, activation='relu',activity_regularizer=regularizers.l1(10e-5))(h)
h = Dropout(0.2)(h)
# h = Dense(intermediate_dim2, activation='relu',activity_regularizer=regularizers.l1(10e-5))(h)
# h = Dropout(0.2)(h)
z_mean = Dense(latent_dim)(h)
z_log_sigma = Dense(latent_dim)(h)
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])
encoder = Model(inputs, [z_mean, z_log_sigma, z], name='encoder')

latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
# 解码层,也就是生成器部分
#x = Dense(intermediate_dim2, activation='relu',activity_regularizer=regularizers.l1(10e-5))(latent_inputs)
#x = Dropout(0.2)(x)
x = Dense(intermediate_dim1, activation='relu',activity_regularizer=regularizers.l1(10e-5))(latent_inputs)
x = Dropout(0.2)(x)
x = Dense(intermediate_dim, activation='relu',activity_regularizer=regularizers.l1(10e-5))(x)
x = Dropout(0.2)(x)

outputs = Dense(732, activation='sigmoid')(x)
decoder1 = Model(latent_inputs, outputs, name='decoder1')
decoder2 = Model(latent_inputs, outputs, name='decoder2')
decoder3 = Model(latent_inputs, outputs, name='decoder3')
decoder4 = Model(latent_inputs, outputs, name='decoder4')
decoder5 = Model(latent_inputs, outputs, name='decoder5')
decoder6 = Model(latent_inputs, outputs, name='decoder6')
decoder7 = Model(latent_inputs, outputs, name='decoder7')
decoder8 = Model(latent_inputs, outputs, name='decoder8')
decoder9 = Model(latent_inputs, outputs, name='decoder9')
outputs1 = decoder1(encoder(inputs)[2])
outputs2 = decoder2(encoder(inputs)[2])
outputs3 = decoder3(encoder(inputs)[2])
outputs4 = decoder4(encoder(inputs)[2])
outputs5 = decoder5(encoder(inputs)[2])
outputs6 = decoder6(encoder(inputs)[2])
outputs7 = decoder7(encoder(inputs)[2])
outputs8 = decoder8(encoder(inputs)[2])
outputs9 = decoder9(encoder(inputs)[2])
outputs = concatenate([outputs1,outputs2,
outputs3,outputs4,
outputs5,outputs6,
outputs7,outputs8,
outputs9],axis =1)

# 建立模型
vae = Model(inputs,outputs, name='vae_mlp')
reconstruction_loss = binary_crossentropy(inputs, outputs)
reconstruction_loss *= original_dim
kl_loss = 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')
vae.summary()

history = vae.fit(data,data,
epochs=1,
batch_size=100,
shuffle=True,
validation_data=(data, data))

val_loss = history.history['val_loss']
encoder = Model(inputs, z_mean, name='encoder')
feature = encoder.predict(data, batch_size=100)
decoded_img1 = decoder1.predict(feature)
decoded_img2 = decoder2.predict(feature)
decoded_img3 = decoder3.predict(feature)
decoded_img4 = decoder4.predict(feature)
decoded_img5 = decoder5.predict(feature)
decoded_img6 = decoder6.predict(feature)
decoded_img7 = decoder7.predict(feature)
decoded_img8 = decoder8.predict(feature)
decoded_img9 = decoder9.predict(feature)
decoded_dp.append(decoded_img1)
decoded_dp.append(decoded_img2)
decoded_dp.append(decoded_img3)
decoded_dp.append(decoded_img4)
decoded_dp.append(decoded_img5)
decoded_dp.append(decoded_img6)
decoded_dp.append(decoded_img7)
decoded_dp.append(decoded_img8)
decoded_dp.append(decoded_img9)
return feature, decoded_dp

def FeatureExtraction_p(original_dim,data):
inputs = Input(shape=(original_dim,))
h = Dense(intermediate_dim, activation='relu',activity_regularizer=regularizers.l1(10e-5))(inputs)
h = Dropout(0.2)(h)
h = Dense(intermediate_dim1, activation='relu',activity_regularizer=regularizers.l1(10e-5))(h)
h = Dropout(0.2)(h)
# h = Dense(intermediate_dim2, activation='relu',activity_regularizer=regularizers.l1(10e-5))(h)
# h = Dropout(0.2)(h)
z_mean = Dense(latent_dim)(h)
z_log_sigma = Dense(latent_dim)(h)
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_sigma])
encoder = Model(inputs, [z_mean, z_log_sigma, z], name='encoder')

latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
# 解码层,也就是生成器部分
#x = Dense(intermediate_dim2, activation='relu',activity_regularizer=regularizers.l1(10e-5))(latent_inputs)
#x = Dropout(0.2)(x)
x = Dense(intermediate_dim1, activation='relu',activity_regularizer=regularizers.l1(10e-5))(latent_inputs)
x = Dropout(0.2)(x)
x = Dense(intermediate_dim, activation='relu',activity_regularizer=regularizers.l1(10e-5))(x)
x = Dropout(0.2)(x)

outputs = Dense(1915, activation='sigmoid')(x)
decoder1 = Model(latent_inputs, outputs, name='decoder1')
decoder2 = Model(latent_inputs, outputs, name='decoder2')
decoder3 = Model(latent_inputs, outputs, name='decoder3')
decoder4 = Model(latent_inputs, outputs, name='decoder4')
decoder5 = Model(latent_inputs, outputs, name='decoder5')
decoder6 = Model(latent_inputs, outputs, name='decoder6')
outputs1 = decoder1(encoder(inputs)[2])
outputs2 = decoder2(encoder(inputs)[2])
outputs3 = decoder3(encoder(inputs)[2])
outputs4 = decoder4(encoder(inputs)[2])
outputs5 = decoder5(encoder(inputs)[2])
outputs6 = decoder6(encoder(inputs)[2])
outputs = concatenate([outputs1,outputs2,
outputs3,outputs4,
outputs5,outputs6],axis =1)
# 建立模型
vae = Model(inputs,outputs, name='vae_mlp')
reconstruction_loss = binary_crossentropy(inputs, outputs)
reconstruction_loss *= original_dim
kl_loss = 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')
vae.summary()

history = vae.fit(data,data,
epochs=1,
batch_size=100,
shuffle=True,
validation_data=(data, data))

val_loss = history.history['val_loss']
encoder = Model(inputs, z_mean, name='encoder')
feature = encoder.predict(data, batch_size=100)


decoded_img11 = decoder1.predict(feature)
decoded_img22 = decoder2.predict(feature)
decoded_img33 = decoder3.predict(feature)
decoded_img44 = decoder4.predict(feature)
decoded_img55 = decoder5.predict(feature)
decoded_img66 = decoder6.predict(feature)

decoded_dp1.append(decoded_img11)
decoded_dp1.append(decoded_img22)
decoded_dp1.append(decoded_img33)
decoded_dp1.append(decoded_img44)
decoded_dp1.append(decoded_img55)
decoded_dp1.append(decoded_img66)
return feature, decoded_dp1

0 comments on commit c0858c8

Please sign in to comment.