In [None]:
import pandas as pd 

importance_path = "simulation/att10/linear/bootstrap/0/explanations/"

layer_importance_list = []
for layer in range(0,7): 

    importance_list = [] 
    for i in range(1, 8): 
        if layer < i : 
            importance = pd.read_csv(importance_path + "PNet_bootstrap_deeplift_target_{}_layer_{}_test.csv".format(i, layer), index_col=-1)
            importance_list.append(importance)
        else: 
            continue

    import numpy as np 
    # each dataframe have same index and columns, we can add them together 
    # except label and prediction columns 

    importance_sum = np.abs(importance_list[0].copy())
    for importance in importance_list[1:]:
        importance_sum.iloc[:, :-2] += np.abs(importance.iloc[:, :-2])

    layer_importance = pd.DataFrame(importance_sum.iloc[:,:-2].sum(axis=0), columns =['importance'])
    layer_importance['layer'] = layer
    layer_importance_list.append(layer_importance)

from openxai.binn.data import PnetSimDataSet, ReactomeNetwork, get_layer_maps


ds = PnetSimDataSet(root="simulation/att10/linear/bootstrap/0", num_features=1)

reactome = ReactomeNetwork(dict(
    reactome_base_dir="./biological_knowledge/reactome",
    relations_file_name="ReactomePathwaysRelation.txt",
    pathway_names_file_name="ReactomePathways.txt",
    pathway_genes_file_name="ReactomePathways.gmt",
))

maps = get_layer_maps(
    genes=list(ds.node_index), reactome=reactome,
    n_levels=6, direction="root_to_leaf", add_unk_genes=False)

# concatenate all layer importance dataframes into one
layer_importance_df = pd.concat(layer_importance_list, axis=0)

genes = layer_importance_df[layer_importance_df['layer'] == 0].index.tolist()

import pandas as pd

# maps: List[pd.DataFrame], 길이=7
#   maps[0]: genes → pathway1
#   maps[1]: pathway1 → pathway2
#   ...
#   maps[5]: pathway5 → pathway6
#   maps[6]: pathway6 → outcome

in_degrees  = []
out_degrees = []

#   in-degree = 0
in_degrees.append(pd.DataFrame(0, index=genes, columns=["in_degree"]))
#   out-degree = maps[0].sum(axis=1)
out_degrees.append(
    pd.DataFrame(maps[0].sum(axis=1).astype(float), columns=["out_degree"])
)

# 2) intermediate layers 1..6
for i in range(1, len(maps)):
    # layer i corresponds to the "child" side of maps[i-1], and "parent" side of maps[i]
    parent_df = maps[i-1]  # maps[i-1]: layer i-1 → layer i
    child_df  = maps[i]    # maps[i]:   layer i   → layer i+1

    # in-degree of layer i = column-sum of parent_df
    in_deg = pd.DataFrame(parent_df.sum(axis=0).astype(float), columns=["in_degree"])
    in_degrees.append(in_deg)

    # out-degree of layer i = row-sum of child_df
    out_deg = pd.DataFrame(child_df.sum(axis=1).astype(float), columns=["out_degree"])
    out_degrees.append(out_deg)

# 이제 in_degrees, out_degrees 둘 다 길이가 7.
assert len(in_degrees) == len(out_degrees) == 7

# 확인: 예를 들어 layer 2 (pathway2)의 degree
print("Layer 2 in-degree:\n",  in_degrees[2].sort_values('in_degree').head())
print("Layer 2 out-degree:\n", out_degrees[2].head())

in_degrees_df = pd.concat(in_degrees, axis=0)
out_degrees_df = pd.concat(out_degrees, axis=0)

# merge in-degrees and out-degrees
degree_df = pd.concat(
    [in_degrees_df, out_degrees_df], axis=1)

importance_degree_df = pd.concat(
    [layer_importance_df, degree_df], axis=1)

importance_degree_df['degree']= importance_degree_df['in_degree'] + importance_degree_df['out_degree']

import pandas as pd
import numpy as np

def connectivity_corrected_scores(df: pd.DataFrame) -> pd.DataFrame:
    """
    각 레이어별로 importance와 degree에 대해 z-score를 구하고,
    score = Z_importance / Z_degree 를 계산해서 반환합니다.

    Parameters
    ----------
    df : DataFrame
        index: node 이름
        columns must include ['importance', 'layer', 'degree']

    Returns
    -------
    df_out : DataFrame
        원본에 세 개 컬럼이 추가된 copy:
        - Z_importance
        - Z_degree
        - score
    """
    results = []
    # 레이어별로 나눠 처리
    for layer, sub in df.groupby('layer', sort=False):
        imp = sub['importance']
        deg = sub['degree']

        # 표준편차가 0 이면 0으로 처리 (deg나 imp가 constant일 때)
        imp_std = imp.std(ddof=0)
        deg_std = deg.std(ddof=0)

        ZA = (imp - imp.mean()) / (imp_std if imp_std != 0 else 1.0)
        Zd = (deg - deg.mean()) / (deg_std if deg_std != 0 else 1.0)

        S = ZA / Zd.replace(0, np.nan)  # Zd==0 → NaN 으로

        temp = sub.copy()
        temp['Z_importance'] = ZA
        temp['Z_degree']     = Zd
        temp['score']        = S

        results.append(temp)

    df_out = pd.concat(results, axis=0)
    # 원래 index 순서는 보존됩니다.
    return df_out

# 예시 사용법
df_scored = connectivity_corrected_scores(importance_degree_df)

df_scored.to_csv(importance_path + "PNet_bootstrap_deeplift_target_scores.csv")

In [49]:
maps[0]

Unnamed: 0,R-HSA-1059683,R-HSA-109688,R-HSA-109704,R-HSA-110056,R-HSA-110312,R-HSA-110314,R-HSA-110320,R-HSA-110328,R-HSA-110329,R-HSA-110330,...,R-HSA-977347,R-HSA-977441,R-HSA-977442,R-HSA-977606,R-HSA-982772,R-HSA-983168,R-HSA-983170,R-HSA-983189,R-HSA-983695,R-HSA-991365
AARS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AARS2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ABCC1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ABCD4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ADAM10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZBTB17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZFYVE9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZW10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZWILCH,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Layer 2 in-degree:
                in_degree
R-HSA-2428928        0.0
R-HSA-166166         0.0
R-HSA-168179         0.0
R-HSA-168188         0.0
R-HSA-5635838        1.0
Layer 2 out-degree:
                out_degree
R-HSA-1059683         1.0
R-HSA-109688          1.0
R-HSA-110056          1.0
R-HSA-110312          1.0
R-HSA-110314          1.0


In [60]:
in_degrees_df

Unnamed: 0,in_degree
AARS,0.0
AARS2,0.0
ABCC1,0.0
ABCD4,0.0
ADAM10,0.0
...,...
R-HSA-73894,7.0
R-HSA-74160,6.0
R-HSA-8953854,11.0
R-HSA-8953897,3.0


In [61]:
out_degrees_df

Unnamed: 0,out_degree
AARS,1.0
AARS2,1.0
ABCC1,2.0
ABCD4,1.0
ADAM10,12.0
...,...
R-HSA-73894,1.0
R-HSA-74160,1.0
R-HSA-8953854,1.0
R-HSA-8953897,1.0


In [71]:
df_scored

Unnamed: 0,importance,layer,in_degree,out_degree,degree,Z_importance,Z_degree,score
AARS,0.000000e+00,0,0.0,1.0,1.0,-0.182046,-0.645881,0.281857
AARS2,0.000000e+00,0,0.0,1.0,1.0,-0.182046,-0.645881,0.281857
ABCC1,0.000000e+00,0,0.0,2.0,2.0,-0.182046,-0.572151,0.318179
ABCD4,0.000000e+00,0,0.0,1.0,1.0,-0.182046,-0.645881,0.281857
ADAM10,0.000000e+00,0,0.0,12.0,12.0,-0.182046,0.165157,-1.102262
...,...,...,...,...,...,...,...,...
R-HSA-73894,8.782256e-09,6,7.0,1.0,8.0,-0.449388,0.305132,-1.472764
R-HSA-74160,1.760289e+00,6,6.0,1.0,7.0,1.228060,0.071796,17.104891
R-HSA-8953854,4.387023e-02,6,11.0,1.0,12.0,-0.407582,1.238478,-0.329099
R-HSA-8953897,6.951000e-05,6,3.0,1.0,4.0,-0.449321,-0.628213,0.715237


In [38]:
len(out_degree_list)

9