In [1]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

import warnings
warnings.filterwarnings('ignore')

In [2]:
datapath = '/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/af_projection/result'

In [None]:
human_af_l_all = np.loadtxt(f'{datapath}/probtrackx2/human_af_l_thr0_norm.txt')
chimp_af_l_all = np.loadtxt(f'{datapath}/probtrackx2/chimp_af_l_thr0_norm.txt')
macaque_af_l_all = np.loadtxt(f'{datapath}/probtrackx2/macaque_tvb_af_l_thr0_norm.txt')
marmoset_af_l_all = np.loadtxt(f'{datapath}/probtrackx2/marmoset_MBM_af_l_thr0_norm.txt')

human_af_r_all = np.loadtxt(f'{datapath}/probtrackx2/human_af_r_thr0_norm.txt')
chimp_af_r_all = np.loadtxt(f'{datapath}/probtrackx2/chimp_af_r_thr0_norm.txt')
macaque_af_r_all = np.loadtxt(f'{datapath}/probtrackx2/macaque_tvb_af_r_thr0_norm.txt')
marmoset_af_r_all = np.loadtxt(f'{datapath}/probtrackx2/marmoset_MBM_af_r_thr0_norm.txt')

In [None]:
index_human_chimp_L = np.loadtxt(f'/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/index_human_chimp_L.txt')
index_human_macaque_L = np.loadtxt(f'/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/index_human_macaque_L.txt')
index_human_marmoset_L = np.loadtxt(f'/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/index_human_marmoset_L.txt')

## construct data

In [5]:
def read_subject_list(txt_path):
    with open(txt_path, "r") as f:
        subs = [line.strip() for line in f if line.strip()]
    return subs

def build_species_df(list_path, species_name, left_vals, right_vals, region):
    subj = read_subject_list(list_path)
    L = np.asarray(left_vals).reshape(-1)
    R = np.asarray(right_vals).reshape(-1)

    if not (len(subj) == len(L) == len(R)):
        raise ValueError(
            f"[{species_name}] length mismatch: "
            f"subjects={len(subj)}, L={len(L)}, R={len(R)}"
        )

    df_L = pd.DataFrame({
        "subject_id": subj,
        "species": species_name,
        "hemisphere": "LH",
        "region": region,
        "connectivity_score": L
    })

    df_R = pd.DataFrame({
        "subject_id": subj,
        "species": species_name,
        "hemisphere": "RH",
        "region": region,
        "connectivity_score": R
    })

    return pd.concat([df_L, df_R], ignore_index=True)

In [6]:
human_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/humanlist40.txt"
chimp_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/chimplist46.txt"
macaque_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/macaque_tvb_list.txt"
marmoset_list_path = "/Users/yufanwang/Desktop/MarmosetWM_Project/revision_NC/indi_bp_atlas/marmoset_MBM_list.txt"

i = 20
region = "A44v"

roi_index_human = i - 1
roi_index_chimp = index_human_chimp_L[roi_index_human] - 1
roi_index_macaque = index_human_macaque_L[roi_index_human] - 1
roi_index_marmoset = index_human_marmoset_L[roi_index_human] - 1

value_human_L = human_af_l_all[:,roi_index_human]
value_chimp_L = chimp_af_l_all[:,int(roi_index_chimp)]
value_macaque_L = macaque_af_l_all[:,int(roi_index_macaque)]
value_marmoset_L = marmoset_af_l_all[:,int(roi_index_marmoset)]

value_human_R = human_af_r_all[:,roi_index_human]
value_chimp_R = chimp_af_r_all[:,int(roi_index_chimp)]
value_macaque_R = macaque_af_r_all[:,int(roi_index_macaque)]
value_marmoset_R = marmoset_af_r_all[:,int(roi_index_marmoset)]

df = pd.concat([
    build_species_df(human_list_path,    "human",      value_human_L,    value_human_R, region),
    build_species_df(chimp_list_path,    "chimpanzee", value_chimp_L,    value_chimp_R, region),
    build_species_df(macaque_list_path,  "macaque",    value_macaque_L,  value_macaque_R, region),
    build_species_df(marmoset_list_path, "marmoset",   value_marmoset_L, value_marmoset_R, region),
], ignore_index=True)

print(df.head())
print(df.shape)  # 应该是 2 * (40+46+8+24) = 236 行

df.to_csv(f'{datapath}/af_projection_{region}.csv', index=False)

  subject_id species hemisphere region  connectivity_score
0     100307   human         LH   A44v            0.251221
1     100408   human         LH   A44v            0.877148
2     103414   human         LH   A44v            0.969813
3     105115   human         LH   A44v            0.255724
4     106016   human         LH   A44v            0.887377
(236, 5)


## construct MixedLM model: species * hemisphere

In [7]:
df["species"] = pd.Categorical(df["species"], categories=["human", "chimpanzee", "macaque", "marmoset"])

model = smf.mixedlm("connectivity_score ~ species * hemisphere", data=df, groups=df["subject_id"])
result = model.fit()

print(result.summary())

                      Mixed Linear Model Regression Results
Model:                   MixedLM      Dependent Variable:      connectivity_score
No. Observations:        236          Method:                  REML              
No. Groups:              118          Scale:                   0.0222            
Min. group size:         2            Log-Likelihood:          74.1097           
Max. group size:         2            Converged:               Yes               
Mean group size:         2.0                                                     
---------------------------------------------------------------------------------
                                       Coef.  Std.Err.   z    P>|z| [0.025 0.975]
---------------------------------------------------------------------------------
Intercept                               0.508    0.026 19.249 0.000  0.457  0.560
species[T.chimpanzee]                  -0.303    0.036 -8.391 0.000 -0.374 -0.232
species[T.macaque]                    