In [1]:
import numpy as np
import pandas as pd
import scipy.io as sio

import warnings
warnings.filterwarnings('ignore')

In [2]:
def read_subject_list(txt_path):
    with open(txt_path, "r") as f:
        subs = [line.strip() for line in f if line.strip()]
    return subs


def build_df_deltaKL(KL_mat, human_ids, nonhuman_ids, species, hemisphere, tract_ids, region):
    """
    KL_mat: numpy array (n_human × n_nonhuman)
    human_ids: list of str (len=n_human)
    nonhuman_ids: list of str (len=n_nonhuman)
    species: str, e.g. "chimpanzee"
    hemisphere: str, "LH" or "RH"
    tract_ids: list of str (len=n_tract)
    region: str
    """
    n_h, n_x, n_t = KL_mat.shape
    if len(human_ids) != n_h or len(nonhuman_ids) != n_x or len(tract_ids) != n_t:
        raise ValueError(f"Shape mismatch for {species}-{hemisphere}: KL {KL_mat.shape}, human {len(human_ids)}, nonhuman {len(nonhuman_ids)}, tract {len(tract_ids)}")

    rows = []
    for i, h in enumerate(human_ids):
        for j, x in enumerate(nonhuman_ids):
            for k, t in enumerate(tract_ids):
                rows.append({
                    "human_id": h,
                    "nonhuman_id": x,
                    "species": species,
                    "hemisphere": hemisphere,
                    "tract": t,
                    "region": region,
                    "deltaKL": float(KL_mat[i, j, k])
                })
    return pd.DataFrame(rows)


def build_df_minKL(KL_mat, human_ids, nonhuman_ids, species, hemisphere, tract_ids, region):
    """
    KL_mat: numpy array (n_human × n_nonhuman)
    human_ids: list of str (len=n_human)
    nonhuman_ids: list of str (len=n_nonhuman)
    species: str, e.g. "chimpanzee"
    hemisphere: str, "LH" or "RH"
    tract_ids: list of str (len=n_tract)
    region: str
    """
    n_h, n_x, n_t = KL_mat.shape
    if len(human_ids) != n_h or len(nonhuman_ids) != n_x or len(tract_ids) != n_t:
        raise ValueError(f"Shape mismatch for {species}-{hemisphere}: KL {KL_mat.shape}, human {len(human_ids)}, nonhuman {len(nonhuman_ids)}, tract {len(tract_ids)}")

    rows = []
    for i, h in enumerate(human_ids):
        for j, x in enumerate(nonhuman_ids):
            for k, t in enumerate(tract_ids):
                rows.append({
                    "human_id": h,
                    "nonhuman_id": x,
                    "species": species,
                    "hemisphere": hemisphere,
                    "tract": t,
                    "region": region,
                    "minKL": float(KL_mat[i, j, k])
                })
    return pd.DataFrame(rows)

In [3]:
human_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/humanlist40.txt"
chimp_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/chimplist46.txt"
macaque_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/macaque_tvb_list.txt"
marmoset_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/marmoset_MBM_list.txt"

human_ids     = read_subject_list(human_list_path)     # 40
chimp_ids     = read_subject_list(chimp_list_path)     # 46
macaque_ids   = read_subject_list(macaque_list_path)   # 8
marmoset_ids  = read_subject_list(marmoset_list_path)  # 24

In [4]:
region = "A45c"

datapath = '/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/knockout_indi'

In [None]:
tract_ids = ['AF', 'ATR', 'SLF3', 'UF']

deltaKL_human_chimp_LH = sio.loadmat(f'{datapath}/c2h_deltaKL_on_human_atlas_A45c.L.mat')
deltaKL_human_chimp_LH = deltaKL_human_chimp_LH["deltaKL_c2h_A45c_L"]
deltaKL_human_chimp_RH = sio.loadmat(f'{datapath}/c2h_deltaKL_on_human_atlas_A45c.R.mat')
deltaKL_human_chimp_RH = deltaKL_human_chimp_RH["deltaKL_c2h_A45c_R"]

deltaKL_human_macaque_LH = sio.loadmat(f'{datapath}/mac2h_deltaKL_on_human_atlas_A45c.L.mat')
deltaKL_human_macaque_LH = deltaKL_human_macaque_LH["deltaKL_mac2h_A45c_L"]
deltaKL_human_macaque_RH = sio.loadmat(f'{datapath}/mac2h_deltaKL_on_human_atlas_A45c.R.mat')
deltaKL_human_macaque_RH = deltaKL_human_macaque_RH["deltaKL_mac2h_A45c_R"]

deltaKL_human_marmoset_LH = sio.loadmat(f'{datapath}/mar2h_deltaKL_on_human_atlas_A45c.L.mat')
deltaKL_human_marmoset_LH = deltaKL_human_marmoset_LH["deltaKL_mar2h_A45c_L"]
deltaKL_human_marmoset_RH = sio.loadmat(f'{datapath}/mar2h_deltaKL_on_human_atlas_A45c.R.mat')
deltaKL_human_marmoset_RH = deltaKL_human_marmoset_RH["deltaKL_mar2h_A45c_R"]

In [6]:
df_chimp = pd.concat([
    build_df_deltaKL(deltaKL_human_chimp_LH, human_ids, chimp_ids, "human-chimpanzee", "LH", tract_ids, region),
    build_df_deltaKL(deltaKL_human_chimp_RH, human_ids, chimp_ids, "human-chimpanzee", "RH", tract_ids, region)
])
df_macaque = pd.concat([
    build_df_deltaKL(deltaKL_human_macaque_LH, human_ids, macaque_ids, "human-macaque", "LH", tract_ids, region),
    build_df_deltaKL(deltaKL_human_macaque_RH, human_ids, macaque_ids, "human-macaque", "RH", tract_ids, region)
])
df_marmoset = pd.concat([
    build_df_deltaKL(deltaKL_human_marmoset_LH, human_ids, marmoset_ids, "human-marmoset", "LH", tract_ids, region),
    build_df_deltaKL(deltaKL_human_marmoset_RH, human_ids, marmoset_ids, "human-marmoset", "RH", tract_ids, region)
])

df = pd.concat([df_chimp, df_macaque, df_marmoset], ignore_index=True)
print(df.head())
print(df.shape)  # 应该是 (40*46*2 + 40*8*2 + 40*24*2) = 6240 条记录

df.to_csv(f'/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/af_projection/result/deltaKL_indi_{region}.csv', index=False)

  human_id nonhuman_id           species hemisphere tract region   deltaKL
0   100307      Agatha  human-chimpanzee         LH    AF   A45c  0.110992
1   100307      Agatha  human-chimpanzee         LH   ATR   A45c  0.031077
2   100307      Agatha  human-chimpanzee         LH  SLF3   A45c  0.090206
3   100307      Agatha  human-chimpanzee         LH    UF   A45c  0.064082
4   100307      Amanda  human-chimpanzee         LH    AF   A45c  0.510599
(24960, 7)


In [10]:
tract_ids = ['AF', 'ATR', 'SLF3', 'UF', 'ALL']

minKL_human_chimp_LH = sio.loadmat(f'{datapath}/c2h_minKL_on_human_atlas_A45c.L.mat')
minKL_human_chimp_LH = minKL_human_chimp_LH["minKL_c2h_A45c_L"]
minKL_human_chimp_RH = sio.loadmat(f'{datapath}/c2h_minKL_on_human_atlas_A45c.R.mat')
minKL_human_chimp_RH = minKL_human_chimp_RH["minKL_c2h_A45c_R"]

minKL_human_macaque_LH = sio.loadmat(f'{datapath}/mac2h_minKL_on_human_atlas_A45c.L.mat')
minKL_human_macaque_LH = minKL_human_macaque_LH["minKL_mac2h_A45c_L"]
minKL_human_macaque_RH = sio.loadmat(f'{datapath}/mac2h_minKL_on_human_atlas_A45c.R.mat')
minKL_human_macaque_RH = minKL_human_macaque_RH["minKL_mac2h_A45c_R"]

minKL_human_marmoset_LH = sio.loadmat(f'{datapath}/mar2h_minKL_on_human_atlas_A45c.L.mat')
minKL_human_marmoset_LH = minKL_human_marmoset_LH["minKL_mar2h_A45c_L"]
minKL_human_marmoset_RH = sio.loadmat(f'{datapath}/mar2h_minKL_on_human_atlas_A45c.R.mat')
minKL_human_marmoset_RH = minKL_human_marmoset_RH["minKL_mar2h_A45c_R"]


In [None]:
df_chimp = pd.concat([
    build_df_minKL(minKL_human_chimp_LH, human_ids, chimp_ids, "human-chimpanzee", "LH", tract_ids, region),
    build_df_minKL(minKL_human_chimp_RH, human_ids, chimp_ids, "human-chimpanzee", "RH", tract_ids, region)
])
df_macaque = pd.concat([
    build_df_minKL(minKL_human_macaque_LH, human_ids, macaque_ids, "human-macaque", "LH", tract_ids, region),
    build_df_minKL(minKL_human_macaque_RH, human_ids, macaque_ids, "human-macaque", "RH", tract_ids, region)
])
df_marmoset = pd.concat([
    build_df_minKL(minKL_human_marmoset_LH, human_ids, marmoset_ids, "human-marmoset", "LH", tract_ids, region),
    build_df_minKL(minKL_human_marmoset_RH, human_ids, marmoset_ids, "human-marmoset", "RH", tract_ids, region)
])

df = pd.concat([df_chimp, df_macaque, df_marmoset], ignore_index=True)
print(df.head())
print(df.shape)  # 应该是 (40*46*2 + 40*8*2 + 40*24*2) = 6240 条记录

df.to_csv(f'/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/af_projection/result/minKL_lesion_indi_{region}.csv', index=False)

  human_id nonhuman_id           species hemisphere tract region     minKL
0   100307      Agatha  human-chimpanzee         LH    AF   A45c  1.650072
1   100307      Agatha  human-chimpanzee         LH   ATR   A45c  1.570156
2   100307      Agatha  human-chimpanzee         LH  SLF3   A45c  1.629285
3   100307      Agatha  human-chimpanzee         LH    UF   A45c  1.474998
4   100307      Agatha  human-chimpanzee         LH   ALL   A45c  1.539080
(31200, 7)
