<a href="https://colab.research.google.com/github/Seung-heon-Baek/Network-Propagation/blob/main/250711_H446_I_HI_NP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!wget --no-check-certificate https://stringdb-static.org/download/protein.links.v11.5/9606.protein.links.v11.5.txt.gz

!gunzip 9606.protein.links.v11.5.txt.gz


--2025-07-11 06:55:06--  https://stringdb-static.org/download/protein.links.v11.5/9606.protein.links.v11.5.txt.gz
Resolving stringdb-static.org (stringdb-static.org)... 49.12.123.75
Connecting to stringdb-static.org (stringdb-static.org)|49.12.123.75|:443... connected.
  Issued certificate has expired.
HTTP request sent, awaiting response... 200 OK
Length: 72718210 (69M) [application/octet-stream]
Saving to: ‘9606.protein.links.v11.5.txt.gz’


2025-07-11 06:55:09 (22.9 MB/s) - ‘9606.protein.links.v11.5.txt.gz’ saved [72718210/72718210]



In [8]:
import pandas as pd
import numpy as np
import networkx as nx
from tqdm import tqdm

# Load STRING PPI network (filtered by combined_score > threshold)
def load_string_network(filepath, score_cutoff=700):
    df = pd.read_csv(filepath, sep=' ')
    df = df[df['combined_score'] >= score_cutoff]

    # Remove '9606.' prefix
    df['protein1'] = df['protein1'].str.replace('9606.', '')
    df['protein2'] = df['protein2'].str.replace('9606.', '')

    G = nx.Graph()
    for _, row in df.iterrows():
        G.add_edge(row['protein1'], row['protein2'], weight=row['combined_score'] / 1000)
    return G


def network_propagation_weighted(G, seed_scores, alpha=0.7, max_iter=20):
    nodes = list(G.nodes)
    node_index = {node: i for i, node in enumerate(nodes)}

    f0 = np.zeros(len(nodes))
    for gene, score in seed_scores.items():
        if gene in node_index:
            f0[node_index[gene]] = score  # logFC 그대로 사용

    A = nx.to_numpy_array(G, nodelist=nodes)
    degree = A.sum(axis=0)
    W = A / (degree + 1e-10)

    f = f0.copy()
    for _ in range(max_iter):
        f = (1 - alpha) * W @ f + alpha * f0

    return {nodes[i]: f[i] for i in range(len(nodes))}

In [12]:
import pandas as pd

deg_df = pd.read_excel("/content/250711_H446_DEG_I_IH.xlsx")

In [16]:
import requests

def map_genes_to_string_ids(gene_list, species=9606):
    url = "https://string-db.org/api/json/get_string_ids"
    params = {
        "identifiers": "\r".join(gene_list),
        "species": species,
        "limit": 1
    }
    response = requests.post(url, data=params)
    data = response.json()

    mapping = {}
    for entry in data:
        mapping[entry['queryItem']] = entry['stringId']
    return mapping

In [33]:
genes = deg_df['gene']
mapping = map_genes_to_string_ids(genes)
print(mapping)

{'IFI27': '9606.ENSP00000483430', 'COL6A2': '9606.ENSP00000300527', 'GABRA5': '9606.ENSP00000382953', 'MYO6': '9606.ENSP00000358994', 'NPAS2': '9606.ENSP00000338283', 'BACE2': '9606.ENSP00000332979', 'SERPING1': '9606.ENSP00000278407', 'HLA-C': '9606.ENSP00000365402', 'FAM189A1': '9606.ENSP00000261275', 'POTED': '9606.ENSP00000299443', 'TNFRSF19': '9606.ENSP00000371693', 'MUM1L1': '9606.ENSP00000338641', 'MAGEB2': '9606.ENSP00000368273', 'UBE2QL1': '9606.ENSP00000382713', 'SATB1': '9606.ENSP00000399518', 'WFDC2': '9606.ENSP00000361761', 'MFNG': '9606.ENSP00000349490', 'HLA-B': '9606.ENSP00000399168', 'PEX5L': '9606.ENSP00000419975', 'AZGP1': '9606.ENSP00000292401', 'GAL': '9606.ENSP00000265643', 'CHST9': '9606.ENSP00000480991', 'PCSK5': '9606.ENSP00000446280', 'ITGA1': '9606.ENSP00000282588', 'ADAMTS12': '9606.ENSP00000422554', 'PAGE4': '9606.ENSP00000218068', 'FAM9B': '9606.ENSP00000318716', 'SERPINB6': '9606.ENSP00000484343', 'USP18': '9606.ENSP00000215794', 'PAGE3': '9606.ENSP000003

In [18]:
mapped_scores = {}
for gene, score in zip(deg_df['gene'], deg_df['logFC']):
    if gene in mapping:
        ens_id = mapping[gene].replace("9606.", "")
        mapped_scores[ens_id] = score

In [37]:
ppi_path = "/content/9606.protein.links.v11.5.txt"
G = load_string_network(ppi_path, score_cutoff=700)

# seed score 생성: DEG의 logFC
# seed_scores = dict(zip(deg_df['gene'], deg_df['logFC']))

# 전파 실행
prop_scores = network_propagation_weighted(G, mapped_scores)

# 상위 결과 출력
top_genes = sorted(prop_scores.items(), key=lambda x: -abs(x[1]))[:100]
for gene, score in top_genes:
    print(f"{gene}: {score:.4f}")

ENSP00000483430: 7.7299
ENSP00000365402: 7.6184
ENSP00000334051: 7.4164
ENSP00000275493: 7.2600
ENSP00000358994: 7.0047
ENSP00000399168: 6.9688
ENSP00000338283: 6.8779
ENSP00000300527: 6.8029
ENSP00000261275: 6.7148
ENSP00000278407: 6.6707
ENSP00000282588: 6.5874
ENSP00000354822: 6.4845
ENSP00000332979: 6.4668
ENSP00000353099: 6.4230
ENSP00000381601: 6.3767
ENSP00000338641: 6.3721
ENSP00000371693: 6.3416
ENSP00000282561: 6.3336
ENSP00000361860: 6.3156
ENSP00000215794: 6.2796
ENSP00000422554: 6.2399
ENSP00000382713: 6.1203
ENSP00000368273: 6.1080
ENSP00000329654: 6.0814
ENSP00000345344: 6.0806
ENSP00000255262: 6.0739
ENSP00000349490: 6.0525
ENSP00000265643: 6.0523
ENSP00000480991: 6.0381
ENSP00000419975: 6.0234
ENSP00000356030: 6.0113
ENSP00000364089: 6.0077
ENSP00000399518: 6.0057
ENSP00000363993: 5.9570
ENSP00000292401: 5.9298
ENSP00000332973: 5.9281
ENSP00000218068: 5.9228
ENSP00000446280: 5.9118
ENSP00000313809: 5.8702
ENSP00000484343: 5.8555
ENSP00000332118: 5.8094
ENSP00000264350:

In [38]:
raw_text = """
ENSP00000483430: 7.7299
ENSP00000365402: 7.6184
ENSP00000334051: 7.4164
ENSP00000275493: 7.2600
ENSP00000358994: 7.0047
ENSP00000399168: 6.9688
ENSP00000338283: 6.8779
ENSP00000300527: 6.8029
ENSP00000261275: 6.7148
ENSP00000278407: 6.6707
ENSP00000282588: 6.5874
ENSP00000354822: 6.4845
ENSP00000332979: 6.4668
ENSP00000353099: 6.4230
ENSP00000381601: 6.3767
ENSP00000338641: 6.3721
ENSP00000371693: 6.3416
ENSP00000282561: 6.3336
ENSP00000361860: 6.3156
ENSP00000215794: 6.2796
ENSP00000422554: 6.2399
ENSP00000382713: 6.1203
ENSP00000368273: 6.1080
ENSP00000329654: 6.0814
ENSP00000345344: 6.0806
ENSP00000255262: 6.0739
ENSP00000349490: 6.0525
ENSP00000265643: 6.0523
ENSP00000480991: 6.0381
ENSP00000419975: 6.0234
ENSP00000356030: 6.0113
ENSP00000364089: 6.0077
ENSP00000399518: 6.0057
ENSP00000363993: 5.9570
ENSP00000292401: 5.9298
ENSP00000332973: 5.9281
ENSP00000218068: 5.9228
ENSP00000446280: 5.9118
ENSP00000313809: 5.8702
ENSP00000484343: 5.8555
ENSP00000332118: 5.8094
ENSP00000264350: 5.8088
ENSP00000316779: 5.8028
ENSP00000265342: 5.7841
ENSP00000414390: 5.7628
ENSP00000255499: 5.7512
ENSP00000344129: 5.7488
ENSP00000387164: 5.7345
ENSP00000376855: 5.7197
ENSP00000360869: 5.6914
ENSP00000347152: 5.6873
ENSP00000364016: 5.6823
ENSP00000342952: 5.6815
ENSP00000329860: 5.6752
ENSP00000243052: 5.6645
ENSP00000262094: 5.6626
ENSP00000333657: 5.6571
ENSP00000261590: 5.6508
ENSP00000370555: 5.6421
ENSP00000270642: 5.6275
ENSP00000340979: 5.6129
ENSP00000366326: 5.6060
ENSP00000267460: 5.5992
ENSP00000482457: 5.5923
ENSP00000480484: 5.5911
ENSP00000356433: 5.5870
ENSP00000368278: 5.5869
ENSP00000386896: 5.5855
ENSP00000344724: 5.5806
ENSP00000276440: 5.5607
ENSP00000320092: 5.5528
ENSP00000291526: 5.5405
ENSP00000262623: 5.5379
ENSP00000294489: 5.5349
ENSP00000374424: 5.5334
ENSP00000303550: 5.5102
ENSP00000280904: 5.5075
ENSP00000362352: 5.4722
ENSP00000257905: 5.4644
ENSP00000407301: 5.4625
ENSP00000329697: 5.4598
ENSP00000258381: 5.4588
ENSP00000217260: 5.4558
ENSP00000424544: 5.4467
ENSP00000242208: 5.4425
ENSP00000483403: 5.4288
ENSP00000405841: 5.4278
ENSP00000274938: 5.3984
ENSP00000433847: 5.3945
ENSP00000216487: 5.3925
ENSP00000393566: 5.3903
ENSP00000347484: 5.3805
ENSP00000359353: 5.3738
ENSP00000340914: 5.3716
ENSP00000375063: 5.3714
ENSP00000386135: 5.3687
ENSP00000333203: 5.3562
ENSP00000381436: 5.3560
ENSP00000356308: 5.3560
ENSP00000347324: 5.3560
"""

# 변환: 딕셔너리로 만들기
prop_scores = {}
for line in raw_text.strip().splitlines():
    pid, score = line.split(":")
    prop_scores[pid.strip()] = float(score.strip())

# 확인
print("prop_scores = {")
for k, v in prop_scores.items():
    print(f"    '{k}': {v:.4f},")
print("}")

prop_scores = {
    'ENSP00000483430': 7.7299,
    'ENSP00000365402': 7.6184,
    'ENSP00000334051': 7.4164,
    'ENSP00000275493': 7.2600,
    'ENSP00000358994': 7.0047,
    'ENSP00000399168': 6.9688,
    'ENSP00000338283': 6.8779,
    'ENSP00000300527': 6.8029,
    'ENSP00000261275': 6.7148,
    'ENSP00000278407': 6.6707,
    'ENSP00000282588': 6.5874,
    'ENSP00000354822': 6.4845,
    'ENSP00000332979': 6.4668,
    'ENSP00000353099': 6.4230,
    'ENSP00000381601': 6.3767,
    'ENSP00000338641': 6.3721,
    'ENSP00000371693': 6.3416,
    'ENSP00000282561': 6.3336,
    'ENSP00000361860': 6.3156,
    'ENSP00000215794': 6.2796,
    'ENSP00000422554': 6.2399,
    'ENSP00000382713': 6.1203,
    'ENSP00000368273': 6.1080,
    'ENSP00000329654': 6.0814,
    'ENSP00000345344': 6.0806,
    'ENSP00000255262': 6.0739,
    'ENSP00000349490': 6.0525,
    'ENSP00000265643': 6.0523,
    'ENSP00000480991': 6.0381,
    'ENSP00000419975': 6.0234,
    'ENSP00000356030': 6.0113,
    'ENSP00000364089': 

In [39]:
# STRING ID → gene 이름 매핑 만들기
string_to_gene = {v.replace('9606.', ''): k for k, v in mapping.items()}

# 변환된 결과 만들기
converted_scores = {}
for pid, score in prop_scores.items():
    if pid in string_to_gene:
        gene_name = string_to_gene[pid]
        print(f"{pid} → {gene_name}")
    else:
        gene_name = pid  # 매핑 없으면 그대로 사용
        print(f"{pid} (no mapping)")

    # 여기에 삽입해야 함!
    converted_scores[gene_name] = score

# 출력
print("\n✅ Converted Scores:")
for name, score in converted_scores.items():
    print(f"{name}: {score:.4f}")

ENSP00000483430 → IFI27
ENSP00000365402 → HLA-C
ENSP00000334051 → GNAL
ENSP00000275493 → EGFR
ENSP00000358994 → MYO6
ENSP00000399168 → HLA-B
ENSP00000338283 → NPAS2
ENSP00000300527 → COL6A2
ENSP00000261275 → FAM189A1
ENSP00000278407 → SERPING1
ENSP00000282588 → ITGA1
ENSP00000354822 → XAF1
ENSP00000332979 → BACE2
ENSP00000353099 → HLA-DRB1
ENSP00000381601 → MX1
ENSP00000338641 → MUM1L1
ENSP00000371693 → TNFRSF19
ENSP00000282561 → GJA1
ENSP00000361860 → TCEAL6
ENSP00000215794 → USP18
ENSP00000422554 → ADAMTS12
ENSP00000382713 → UBE2QL1
ENSP00000368273 → MAGEB2
ENSP00000329654 → WBSCR17
ENSP00000345344 → CTSL1
ENSP00000255262 → NMUR2
ENSP00000349490 → MFNG
ENSP00000265643 → GAL
ENSP00000480991 → CHST9
ENSP00000419975 → PEX5L
ENSP00000356030 → CD55
ENSP00000364089 → PAGE3
ENSP00000399518 → SATB1
ENSP00000363993 → PSMB9
ENSP00000292401 → AZGP1
ENSP00000332973 → SMAD3
ENSP00000218068 → PAGE4
ENSP00000446280 → PCSK5
ENSP00000313809 → AMBN
ENSP00000484343 → SERPINB6
ENSP00000332118 → EPHB3
EN