In [3]:
import numpy as np
import scipy.io

def getRelatedDiseases(DiseaseID, phenotype_network):
    m = phenotype_network.shape[0]
    line_num = 0
    for i in range(m):
        if phenotype_network[i, 0] == DiseaseID:
            line_num = i
            break

    similarity_vec = phenotype_network[line_num, 1:]

    pos = np.argsort(similarity_vec)
    relationDiseasePos = pos[-5:]

    relationDiseaseID = phenotype_network[relationDiseasePos, 0]

    return relationDiseaseID, relationDiseasePos

# 加载ppi_network，该网络储存了基因与基因之间的相似关系。基因总数为8919，故该网络维度为8919*8919。
ppi_network = scipy.io.loadmat("ppi_network.mat")["ppi_network"]
n = ppi_network.shape[0]  # n是基因的数量 (8919)
outDegree = np.sum(ppi_network, axis=1)
ppi_network = ppi_network / outDegree.reshape(-1, 1)

# 加载phenotype_network，该网络储存了疾病与疾病间的相似度关系。疾病总数为5080。注意第一列为疾病ID，故该网络维度为5080*5081
phenotype_network = scipy.io.loadmat("phenotype_network.mat")["phenotype_network"]

# 加载g_p_network，该网络储存了基因与疾病间的关联关系，若第i个基因与第j个疾病有关，则该位置元素为1，无关为0。维度为8919*5080。
g_p_network = scipy.io.loadmat("g_p_network.mat")["g_p_network"]

# 设置疾病ID，该ID需要通过phenotype_network的第一列查看。可以单独存储。
DiseaseID = 100070

relatedDiseaseID, relatedDiseasePos = getRelatedDiseases(DiseaseID, phenotype_network)

# 初始化优先级向量R
R = np.zeros(n)
# 对于输入的疾病ID，首先找到与之最相似的五个疾病，然后找到与五个疾病相关的致病基因，将优先级向量R中致病基因对应的位置设为1，最后归一化，完成R的初始化。
for i in range(5):
    pheno = relatedDiseasePos[i]
    for j in range(n):
        if g_p_network[j, pheno] != 0:
            R[j] = 1
if np.sum(R) > 0:
    R = R / np.sum(R)

for i in range(n):
    if R[i] != 0:
        print("PR {} is {}".format(i, R[i]))
#利用带重启的随机游走迭代更新R,其中ppi_network相当于邻接矩阵（概率转移矩阵），d为重启概率，e为起始向量。
# 迭代
e = R
iterator = 50
d = 0.1
for _ in range(iterator):
    '''
    code here
    '''
    e = (1 - d) * np.dot(e, ppi_network) + d * R


# 输出与输入疾病最相关的十个基因
'''
code here
'''
related_genes = np.argsort(e)[::-1][:10]  # 获取最大的10个索引
related_genes_scores = e[related_genes]  # 获取这10个基因的优先级分数
print("Related genes (top 10):")
for gene_id, score in zip(related_genes, related_genes_scores):
    print(f"Gene {gene_id} with score {score}")





PR 3516 is 1.0
Related genes (top 10):
Gene 3516 with score 0.10119145658905299
Gene 1577 with score 0.09928804821483092
Gene 152 with score 0.0050838536004457055
Gene 501 with score 0.004352984614687516
Gene 368 with score 0.004328286867968305
Gene 146 with score 0.0041040931907015445
Gene 421 with score 0.004091523159882904
Gene 191 with score 0.003853152260652582
Gene 2354 with score 0.003835770445187456
Gene 137 with score 0.003656135491995338
