In [2]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import scipy.io as sio
from statsmodels.stats.multitest import multipletests

import warnings
warnings.filterwarnings('ignore')

# contruct data

In [3]:
def read_subject_list(txt_path):
    with open(txt_path, "r") as f:
        subs = [line.strip() for line in f if line.strip()]
    return subs


def build_df_dice(dice_mat, human_ids, nonhuman_ids, species, hemisphere):
    """
    dice_mat: numpy array (n_human × n_nonhuman)
    human_ids: list of str (len=n_human)
    nonhuman_ids: list of str (len=n_nonhuman)
    species: str, e.g. "chimpanzee"
    hemisphere: str, "LH" or "RH"
    """
    n_h, n_x = dice_mat.shape
    if len(human_ids) != n_h or len(nonhuman_ids) != n_x:
        raise ValueError(f"Shape mismatch for {species}-{hemisphere}: dice {dice_mat.shape}, human {len(human_ids)}, nonhuman {len(nonhuman_ids)}")

    rows = []
    for i, h in enumerate(human_ids):
        for j, x in enumerate(nonhuman_ids):
            rows.append({
                "human_id": h,
                "nonhuman_id": x,
                "species": species,
                "hemisphere": hemisphere,
                "dice": float(dice_mat[i, j])
            })
    return pd.DataFrame(rows)


def build_df_extension_ratio(extension_ratio_mat, human_ids, nonhuman_ids, species, hemisphere):
    """
    extension_ratio_mat: numpy array (n_human × n_nonhuman)
    human_ids: list of str (len=n_human)
    nonhuman_ids: list of str (len=n_nonhuman)
    species: str, e.g. "chimpanzee"
    hemisphere: str, "LH" or "RH"
    """
    n_h, n_x = extension_ratio_mat.shape
    if len(human_ids) != n_h or len(nonhuman_ids) != n_x:
        raise ValueError(f"Shape mismatch for {species}-{hemisphere}: extension_ratio {extension_ratio_mat.shape}, human {len(human_ids)}, nonhuman {len(nonhuman_ids)}")

    rows = []
    for i, h in enumerate(human_ids):
        for j, x in enumerate(nonhuman_ids):
            rows.append({
                "human_id": h,
                "nonhuman_id": x,
                "species": species,
                "hemisphere": hemisphere,
                "extension_ratio": float(extension_ratio_mat[i, j])
            })
    return pd.DataFrame(rows)


def build_df_local_corr(local_corr_mat, human_ids, nonhuman_ids, species, hemisphere):
    """
    locall_corr_mat: numpy array (n_human × n_nonhuman)
    human_ids: list of str (len=n_human)
    nonhuman_ids: list of str (len=n_nonhuman)
    species: str, e.g. "chimpanzee"
    hemisphere: str, "LH" or "RH"
    """
    n_h, n_x = local_corr_mat.shape
    if len(human_ids) != n_h or len(nonhuman_ids) != n_x:
        raise ValueError(f"Shape mismatch for {species}-{hemisphere}: locall_corr {local_corr_mat.shape}, human {len(human_ids)}, nonhuman {len(nonhuman_ids)}")
    
    rows = []
    for i, h in enumerate(human_ids):
        for j, x in enumerate(nonhuman_ids):
            rows.append({
                "human_id": h,
                "nonhuman_id": x,
                "species": species,
                "hemisphere": hemisphere,
                "local_corr": float(local_corr_mat[i, j])
            })
    return pd.DataFrame(rows)

In [5]:
human_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/humanlist40.txt"
macaque_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/macaque_tvb_list.txt"
marmoset_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/marmoset_MBM_list.txt"

human_ids     = read_subject_list(human_list_path)     # 40
macaque_ids   = read_subject_list(macaque_list_path)   # 8
marmoset_ids  = read_subject_list(marmoset_list_path)  # 24

In [None]:
datapath = '/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/af_registration'

## macaque
dice_human_macaque_LH = np.loadtxt(f'{datapath}/dice_human_macaque_l.txt')
dice_human_macaque_RH = np.loadtxt(f'{datapath}/dice_human_macaque_r.txt')
dice_human_macaque_LH = np.nan_to_num(dice_human_macaque_LH, nan=0.0, posinf=0.0, neginf=0.0)
dice_human_macaque_RH = np.nan_to_num(dice_human_macaque_RH, nan=0.0, posinf=0.0, neginf=0.0)

extension_ratio_human_macaque_LH = np.loadtxt(f'{datapath}/extension_ratio_human_macaque_l.txt')
extension_ratio_human_macaque_RH = np.loadtxt(f'{datapath}/extension_ratio_human_macaque_r.txt')
extension_ratio_human_macaque_LH = np.nan_to_num(extension_ratio_human_macaque_LH, nan=0.0, posinf=0.0, neginf=0.0)
extension_ratio_human_macaque_RH = np.nan_to_num(extension_ratio_human_macaque_RH, nan=0.0, posinf=0.0, neginf=0.0)

local_corr_human_macaque_LH = sio.loadmat(f'{datapath}/localcorr_human_macaque_l_40_weighted_A45c.mat')
local_corr_human_macaque_LH = local_corr_human_macaque_LH["localcorr_human_macaque_A45c_l"]
local_corr_human_macaque_LH = np.nan_to_num(local_corr_human_macaque_LH, nan=0.0, posinf=0.0, neginf=0.0)
local_corr_human_macaque_LH = np.mean(local_corr_human_macaque_LH, axis=2)

local_corr_human_macaque_RH = sio.loadmat(f'{datapath}/localcorr_human_macaque_r_40_weighted_A45c.mat')
local_corr_human_macaque_RH = local_corr_human_macaque_RH["localcorr_human_macaque_A45c_r"]
local_corr_human_macaque_RH = np.nan_to_num(local_corr_human_macaque_RH, nan=0.0, posinf=0.0, neginf=0.0)
local_corr_human_macaque_RH = np.mean(local_corr_human_macaque_RH, axis=2)

## marmoset
dice_human_marmoset_LH = np.loadtxt(f'{datapath}/dice_human_marmoset_l.txt')
dice_human_marmoset_RH = np.loadtxt(f'{datapath}/dice_human_marmoset_r.txt')
dice_human_marmoset_LH = np.nan_to_num(dice_human_marmoset_LH, nan=0.0, posinf=0.0, neginf=0.0)
dice_human_marmoset_RH = np.nan_to_num(dice_human_marmoset_RH, nan=0.0, posinf=0.0, neginf=0.0)

extension_ratio_human_marmoset_LH = np.loadtxt(f'{datapath}/extension_ratio_human_marmoset_l.txt')
extension_ratio_human_marmoset_RH = np.loadtxt(f'{datapath}/extension_ratio_human_marmoset_r.txt')
extension_ratio_human_marmoset_LH = np.nan_to_num(extension_ratio_human_marmoset_LH, nan=0.0, posinf=0.0, neginf=0.0)
extension_ratio_human_marmoset_RH = np.nan_to_num(extension_ratio_human_marmoset_RH, nan=0.0, posinf=0.0, neginf=0.0)

local_corr_human_marmoset_LH = sio.loadmat(f'{datapath}/localcorr_human_marmoset_l_40_weighted_A45c.mat')
local_corr_human_marmoset_LH = local_corr_human_marmoset_LH["localcorr_human_marmoset_A45c_l"]
local_corr_human_marmoset_LH = np.nan_to_num(local_corr_human_marmoset_LH, nan=0.0, posinf=0.0, neginf=0.0)
local_corr_human_marmoset_LH = np.mean(local_corr_human_marmoset_LH, axis=2)

local_corr_human_marmoset_RH = sio.loadmat(f'{datapath}/localcorr_human_marmoset_r_40_weighted_A45c.mat')
local_corr_human_marmoset_RH = local_corr_human_marmoset_RH["localcorr_human_marmoset_A45c_r"]
local_corr_human_marmoset_RH = np.nan_to_num(local_corr_human_marmoset_RH, nan=0.0, posinf=0.0, neginf=0.0)
local_corr_human_marmoset_RH = np.mean(local_corr_human_marmoset_RH, axis=2)

In [9]:
df_dice_macaque = pd.concat([
    build_df_dice(dice_human_macaque_LH, human_ids, macaque_ids, "human-macaque", "LH"),
    build_df_dice(dice_human_macaque_RH, human_ids, macaque_ids, "human-macaque", "RH")
])
df_dice_marmoset = pd.concat([
    build_df_dice(dice_human_marmoset_LH, human_ids, marmoset_ids, "human-marmoset", "LH"),
    build_df_dice(dice_human_marmoset_RH, human_ids, marmoset_ids, "human-marmoset", "RH")
])
df_dice = pd.concat([df_dice_macaque, df_dice_marmoset], ignore_index=True)
df_dice.to_csv(f'{datapath}/dice.csv', index=False)


df_extension_ratio_macaque = pd.concat([
    build_df_extension_ratio(extension_ratio_human_macaque_LH, human_ids, macaque_ids, "human-macaque", "LH"),
    build_df_extension_ratio(extension_ratio_human_macaque_RH, human_ids, macaque_ids, "human-macaque", "RH")
])
df_extension_ratio_marmoset = pd.concat([
    build_df_extension_ratio(extension_ratio_human_marmoset_LH, human_ids, marmoset_ids, "human-marmoset", "LH"),
    build_df_extension_ratio(extension_ratio_human_marmoset_RH, human_ids, marmoset_ids, "human-marmoset", "RH")
])
df_extension = pd.concat([df_extension_ratio_macaque, df_extension_ratio_marmoset], ignore_index=True)
df_extension.to_csv(f'{datapath}/extension_ratio.csv', index=False)


df_local_corr_macaque = pd.concat([
    build_df_local_corr(local_corr_human_macaque_LH, human_ids, macaque_ids, "human-macaque", "LH"),
    build_df_local_corr(local_corr_human_macaque_RH, human_ids, macaque_ids, "human-macaque", "RH")
])
df_local_corr_marmoset = pd.concat([
    build_df_local_corr(local_corr_human_marmoset_LH, human_ids, marmoset_ids, "human-marmoset", "LH"),
    build_df_local_corr(local_corr_human_marmoset_RH, human_ids, marmoset_ids, "human-marmoset", "RH")
])
df_local_corr = pd.concat([df_local_corr_macaque, df_local_corr_marmoset], ignore_index=True)
df_local_corr.to_csv(f'{datapath}/local_corr.csv', index=False)