In [1]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import scipy.io as sio
from statsmodels.stats.multitest import multipletests

import warnings
warnings.filterwarnings('ignore')

# contruct data

In [2]:
def read_subject_list(txt_path):
    with open(txt_path, "r") as f:
        subs = [line.strip() for line in f if line.strip()]
    return subs


def build_df(KL_mat, human_ids, nonhuman_ids, species, hemisphere, region):
    """
    KL_mat: numpy array (n_human × n_nonhuman)
    human_ids: list of str (len=n_human)
    nonhuman_ids: list of str (len=n_nonhuman)
    species: str, e.g. "chimpanzee"
    hemisphere: str, "LH" or "RH"
    region: str
    """
    n_h, n_x = KL_mat.shape
    if len(human_ids) != n_h or len(nonhuman_ids) != n_x:
        raise ValueError(f"Shape mismatch for {species}-{hemisphere}: KL {KL_mat.shape}, human {len(human_ids)}, nonhuman {len(nonhuman_ids)}")

    rows = []
    for i, h in enumerate(human_ids):
        for j, x in enumerate(nonhuman_ids):
            rows.append({
                "human_id": h,
                "nonhuman_id": x,
                "species": species,
                "hemisphere": hemisphere,
                "region": region,
                "KL": float(KL_mat[i, j])
            })
    return pd.DataFrame(rows)

In [3]:
human_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/humanlist40.txt"
chimp_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/chimplist46.txt"
macaque_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/macaque_tvb_list.txt"
marmoset_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/marmoset_MBM_list.txt"

human_ids     = read_subject_list(human_list_path)     # 40
chimp_ids     = read_subject_list(chimp_list_path)     # 46
macaque_ids   = read_subject_list(macaque_list_path)   # 8
marmoset_ids  = read_subject_list(marmoset_list_path)  # 24

In [None]:
i = 17
region = "A45c"

roi_index_human = i - 1

datapath = '/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/minKL_indi'

KL_human_chimp_LH = sio.loadmat(f'{datapath}/c2h_minKL_on_human_atlas_all_subject.L.mat')
KL_human_chimp_LH = KL_human_chimp_LH["minKL_c2h_all_L"]
KL_human_chimp_LH = KL_human_chimp_LH[:,:,roi_index_human]
KL_human_chimp_RH = sio.loadmat(f'{datapath}/c2h_minKL_on_human_atlas_all_subject.R.mat')
KL_human_chimp_RH = KL_human_chimp_RH["minKL_c2h_all_R"]
KL_human_chimp_RH = KL_human_chimp_RH[:,:,roi_index_human]

KL_human_macaque_LH = sio.loadmat(f'{datapath}/mac2h_minKL_on_human_atlas_all_subject.L.mat')
KL_human_macaque_LH = KL_human_macaque_LH["minKL_mac2h_all_L"]
KL_human_macaque_LH = KL_human_macaque_LH[:,:,roi_index_human]
KL_human_macaque_RH = sio.loadmat(f'{datapath}/mac2h_minKL_on_human_atlas_all_subject.R.mat')
KL_human_macaque_RH = KL_human_macaque_RH["minKL_mac2h_all_R"]
KL_human_macaque_RH = KL_human_macaque_RH[:,:,roi_index_human]

KL_human_marmoset_LH = sio.loadmat(f'{datapath}/mar2h_minKL_on_human_atlas_all_subject.L.mat')
KL_human_marmoset_LH = KL_human_marmoset_LH["minKL_mar2h_all_L"]
KL_human_marmoset_LH = KL_human_marmoset_LH[:,:,roi_index_human]
KL_human_marmoset_RH = sio.loadmat(f'{datapath}/mar2h_minKL_on_human_atlas_all_subject.R.mat')
KL_human_marmoset_RH = KL_human_marmoset_RH["minKL_mar2h_all_R"]
KL_human_marmoset_RH = KL_human_marmoset_RH[:,:,roi_index_human]

In [None]:
df_chimp = pd.concat([
    build_df(KL_human_chimp_LH, human_ids, chimp_ids, "human-chimpanzee", "LH", region),
    build_df(KL_human_chimp_RH, human_ids, chimp_ids, "human-chimpanzee", "RH", region)
])
df_macaque = pd.concat([
    build_df(KL_human_macaque_LH, human_ids, macaque_ids, "human-macaque", "LH", region),
    build_df(KL_human_macaque_RH, human_ids, macaque_ids, "human-macaque", "RH", region)
])
df_marmoset = pd.concat([
    build_df(KL_human_marmoset_LH, human_ids, marmoset_ids, "human-marmoset", "LH", region),
    build_df(KL_human_marmoset_RH, human_ids, marmoset_ids, "human-marmoset", "RH", region)
])

df = pd.concat([df_chimp, df_macaque, df_marmoset], ignore_index=True)
print(df.head())
print(df.shape)  # 应该是 (40*46*2 + 40*8*2 + 40*24*2) = 6240 条记录

df.to_csv(f'/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/af_projection/result/minKL_indi_{region}.csv', index=False)

  human_id nonhuman_id           species hemisphere region        KL
0   100307      Agatha  human-chimpanzee         LH   A45c  1.539080
1   100307      Amanda  human-chimpanzee         LH   A45c  2.418905
2   100307     Artemus  human-chimpanzee         LH   A45c  3.334183
3   100307      Azalea  human-chimpanzee         LH   A45c  1.959067
4   100307     Barbara  human-chimpanzee         LH   A45c  1.287634
(6240, 6)
