In [1]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import scipy.io as sio
from statsmodels.stats.multitest import multipletests

import warnings
warnings.filterwarnings('ignore')

# contruct data

In [2]:
def read_subject_list(txt_path):
    with open(txt_path, "r") as f:
        subs = [line.strip() for line in f if line.strip()]
    return subs


def build_df_site(KL_mat, human_ids, nonhuman_ids, species, site, hemisphere, region):
    """
    KL_mat: numpy array (n_human × n_nonhuman)
    human_ids: list of str (len=n_human)
    nonhuman_ids: list of str (len=n_nonhuman)
    species: str, e.g. "chimpanzee"
    hemisphere: str, "LH" or "RH"
    region: str
    """
    n_h, n_x = KL_mat.shape
    if len(human_ids) != n_h or len(nonhuman_ids) != n_x:
        raise ValueError(f"Shape mismatch for {species}-{hemisphere}: KL {KL_mat.shape}, human {len(human_ids)}, nonhuman {len(nonhuman_ids)}")

    rows = []
    for i, h in enumerate(human_ids):
        for j, x in enumerate(nonhuman_ids):
            rows.append({
                "human_id": h,
                "nonhuman_id": x,
                "species": species,
                "site": site,
                "hemisphere": hemisphere,
                "region": region,
                "KL": float(KL_mat[i, j])
            })
    return pd.DataFrame(rows)

In [3]:
human_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/humanlist40.txt"
chimp_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/chimplist46.txt"
macaque_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/macaque_tvb_list.txt"
marmoset_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/marmoset_MBM_list.txt"
macaque_ucd_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/macaque_ucdavis_list.txt"
marmoset_minds_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/marmoset_minds_list.txt"

human_ids     = read_subject_list(human_list_path)     # 40
chimp_ids     = read_subject_list(chimp_list_path)     # 46
macaque_ids   = read_subject_list(macaque_list_path)   # 8
marmoset_ids  = read_subject_list(marmoset_list_path)  # 24
macaque_ucd_ids   = read_subject_list(macaque_ucd_list_path)   # 19
marmoset_minds_ids  = read_subject_list(marmoset_minds_list_path)  # 110

In [5]:
i = 17
region = "A45c"

roi_index_human = i - 1

datapath = '/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/minKL_indi'

KL_human_chimp_LH = sio.loadmat(f'{datapath}/c2h_minKL_on_human_atlas_all_subject.L.mat')
KL_human_chimp_LH = KL_human_chimp_LH["minKL_c2h_all_L"]
KL_human_chimp_LH = KL_human_chimp_LH[:,:,roi_index_human]
KL_human_chimp_RH = sio.loadmat(f'{datapath}/c2h_minKL_on_human_atlas_all_subject.R.mat')
KL_human_chimp_RH = KL_human_chimp_RH["minKL_c2h_all_R"]
KL_human_chimp_RH = KL_human_chimp_RH[:,:,roi_index_human]

KL_human_macaque_LH = sio.loadmat(f'{datapath}/mac2h_minKL_on_human_atlas_all_subject.L.mat')
KL_human_macaque_LH = KL_human_macaque_LH["minKL_mac2h_all_L"]
KL_human_macaque_LH = KL_human_macaque_LH[:,:,roi_index_human]
KL_human_macaque_RH = sio.loadmat(f'{datapath}/mac2h_minKL_on_human_atlas_all_subject.R.mat')
KL_human_macaque_RH = KL_human_macaque_RH["minKL_mac2h_all_R"]
KL_human_macaque_RH = KL_human_macaque_RH[:,:,roi_index_human]

KL_human_marmoset_LH = sio.loadmat(f'{datapath}/mar2h_minKL_on_human_atlas_all_subject.L.mat')
KL_human_marmoset_LH = KL_human_marmoset_LH["minKL_mar2h_all_L"]
KL_human_marmoset_LH = KL_human_marmoset_LH[:,:,roi_index_human]
KL_human_marmoset_RH = sio.loadmat(f'{datapath}/mar2h_minKL_on_human_atlas_all_subject.R.mat')
KL_human_marmoset_RH = KL_human_marmoset_RH["minKL_mar2h_all_R"]
KL_human_marmoset_RH = KL_human_marmoset_RH[:,:,roi_index_human]

KL_human_macaque_ucd_LH = sio.loadmat(f'{datapath}/mac2h_ucd_minKL_on_human_atlas_all_subject.L.mat')
KL_human_macaque_ucd_LH = KL_human_macaque_ucd_LH["minKL_mac2h_all_L"]
KL_human_macaque_ucd_LH = KL_human_macaque_ucd_LH[:,:,roi_index_human]
KL_human_macaque_ucd_RH = sio.loadmat(f'{datapath}/mac2h_ucd_minKL_on_human_atlas_all_subject.R.mat')
KL_human_macaque_ucd_RH = KL_human_macaque_ucd_RH["minKL_mac2h_all_R"]
KL_human_macaque_ucd_RH = KL_human_macaque_ucd_RH[:,:,roi_index_human]

KL_human_marmoset_minds_LH = sio.loadmat(f'{datapath}/mar2h_minds_minKL_on_human_atlas_all_subject.L.mat')
KL_human_marmoset_minds_LH = KL_human_marmoset_minds_LH["minKL_mar2h_all_L"]
KL_human_marmoset_minds_LH = KL_human_marmoset_minds_LH[:,:,roi_index_human]
KL_human_marmoset_minds_RH = sio.loadmat(f'{datapath}/mar2h_minds_minKL_on_human_atlas_all_subject.R.mat')
KL_human_marmoset_minds_RH = KL_human_marmoset_minds_RH["minKL_mar2h_all_R"]
KL_human_marmoset_minds_RH = KL_human_marmoset_minds_RH[:,:,roi_index_human]

In [8]:
df_chimp = pd.concat([
    build_df_site(KL_human_chimp_LH, human_ids, chimp_ids, "human-chimpanzee", "NCBR", "LH", region),
    build_df_site(KL_human_chimp_RH, human_ids, chimp_ids, "human-chimpanzee", "NCBR", "RH", region)
])
df_macaque = pd.concat([
    build_df_site(KL_human_macaque_LH, human_ids, macaque_ids, "human-macaque", "tvb", "LH", region),
    build_df_site(KL_human_macaque_RH, human_ids, macaque_ids, "human-macaque", "tvb", "RH", region)
])
df_marmoset = pd.concat([
    build_df_site(KL_human_marmoset_LH, human_ids, marmoset_ids, "human-marmoset", "MBM", "LH", region),
    build_df_site(KL_human_marmoset_RH, human_ids, marmoset_ids, "human-marmoset", "MBM", "RH", region)
])
df_macaque_ucd = pd.concat([
    build_df_site(KL_human_macaque_ucd_LH, human_ids, macaque_ucd_ids, "human-macaque", "ucd", "LH", region),
    build_df_site(KL_human_macaque_ucd_RH, human_ids, macaque_ucd_ids, "human-macaque", "ucd", "RH", region)
])
df_marmoset_minds = pd.concat([
    build_df_site(KL_human_marmoset_minds_LH, human_ids, marmoset_minds_ids, "human-marmoset", "minds", "LH", region),
    build_df_site(KL_human_marmoset_minds_RH, human_ids, marmoset_minds_ids, "human-marmoset", "minds", "RH", region)
])

df = pd.concat([df_chimp, df_macaque_ucd, df_marmoset_minds], ignore_index=True)
print(df.head())
print(df.shape)

df.to_csv(f'/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/af_projection/result/minKL_indi_{region}_ucd_minds.csv', index=False)

df = pd.concat([df_macaque, df_macaque_ucd], ignore_index=True)
print(df.head())
print(df.shape)

df.to_csv(f'/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/af_projection/result/minKL_indi_{region}_ucd_minds_macaque.csv', index=False)

df = pd.concat([df_marmoset, df_marmoset_minds], ignore_index=True)
print(df.head())
print(df.shape)

df.to_csv(f'/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/af_projection/result/minKL_indi_{region}_ucd_minds_marmoset.csv', index=False)

  human_id nonhuman_id           species  site hemisphere region        KL
0   100307      Agatha  human-chimpanzee  NCBR         LH   A45c  1.539080
1   100307      Amanda  human-chimpanzee  NCBR         LH   A45c  2.418905
2   100307     Artemus  human-chimpanzee  NCBR         LH   A45c  3.334183
3   100307      Azalea  human-chimpanzee  NCBR         LH   A45c  1.959067
4   100307     Barbara  human-chimpanzee  NCBR         LH   A45c  1.287634
(14000, 7)
  human_id nonhuman_id        species site hemisphere region        KL
0   100307      sub-01  human-macaque  tvb         LH   A45c  2.781829
1   100307      sub-02  human-macaque  tvb         LH   A45c  1.586648
2   100307      sub-03  human-macaque  tvb         LH   A45c  2.309261
3   100307      sub-04  human-macaque  tvb         LH   A45c  2.743090
4   100307      sub-05  human-macaque  tvb         LH   A45c  2.318318
(2160, 7)
  human_id nonhuman_id         species site hemisphere region        KL
0   100307  sub-NIHm14  human-m