In [38]:
import os
import glob
import numpy as np
import pandas as pd

from scipy.spatial.distance import pdist, squareform
from scipy.stats import pearsonr,spearmanr

import matplotlib.pyplot as plt
import seaborn as sns
bahav_data_dir = '/Users/li/Desktop/task-debate/behavdata'

sub_list_num = list(range(13,51))
sub_list_num.remove(21)

time_points = list(range(0,2986,2)) + [2986]  # every TR minute
all_subject_data = []
for sub in sub_list_num:
    file_path = os.path.join(bahav_data_dir, 'during_scan', 'combined_6runs_per_TR_filter', f'subject_{sub}_TR_rate.csv')
    
    df = pd.read_csv(file_path)
    sub_data = df[df['time'].isin(time_points)]
    all_subject_data.append(list(sub_data['rate']))

attitude = pd.DataFrame(all_subject_data)
print(attitude.shape)

(37, 1494)


In [88]:

attitude_change_point = attitude.diff(axis=1)
attitude_change_point = attitude_change_point.drop(attitude_change_point.columns[0], axis=1)
df_binary = (attitude_change_point != 0).astype(int)
subs_change_points = np.array(df_binary)
subs_change_points.shape

(37, 1493)

In [90]:

remove_times = np.array([125,  249,  385,  548,  741,  945, 1231, 1492])

# 计算需要删除的时间点的索引
# 假设时间点范围是从 0 到 1000，找到对应列索引
remove_indices = remove_times[remove_times < subs_change_points.shape[1]]  # 确保索引在有效范围内

# 删除这些列的数据
subs_change_points = np.delete(subs_change_points, remove_indices, axis=1)

# 输出处理后的 subs_change_points 的形状
print(subs_change_points.shape)

(37, 1485)


In [92]:

change_point_counts = np.sum(subs_change_points, axis=1)

# 计算描述统计数据
desc_stats = {
    'Mean': np.mean(change_point_counts),
    'Std': np.std(change_point_counts),
    'Min': np.min(change_point_counts),
    'Max': np.max(change_point_counts),
    'Median': np.median(change_point_counts),
    '25th Percentile': np.percentile(change_point_counts, 25),
    '75th Percentile': np.percentile(change_point_counts, 75)
}

# 转换为 pandas DataFrame，方便查看
desc_stats_df = pd.DataFrame([desc_stats])

print(desc_stats_df)


        Mean      Std  Min  Max  Median  25th Percentile  75th Percentile
0  17.108108  12.8332    1   55    12.0              8.0             28.0


In [112]:

num_subjects = subs_change_points.shape[0]
dice_matrix = np.zeros((num_subjects, num_subjects))

# 计算每对被试的Dice系数
for i in range(num_subjects):
    for j in range(i, num_subjects):
        intersection = np.sum(np.logical_and(subs_change_points[i], subs_change_points[j]))
        union = np.sum(np.logical_or(subs_change_points[i], subs_change_points[j]))
        # 计算Dice系数
        dice_coefficient = 2 * intersection / (np.sum(subs_change_points[i]) + np.sum(subs_change_points[j]))
        dice_matrix[i, j] = dice_coefficient
        dice_matrix[j, i] = dice_coefficient

# 计算原始数据的平均Dice系数（只计算上三角，不包括对角线）
original_mean_dice = np.mean(dice_matrix[np.triu_indices(num_subjects, k=1)])

# 随机化检验
n_permutations = 1000  # 设置随机化次数
permutation_means = []

# 执行随机化检验
for _ in range(n_permutations):
    # 创建一个副本，不影响原始数据
    permuted_data = np.copy(subs_change_points)
    
    # 对每个被试独立打乱其变化点顺序
    for i in range(num_subjects):
        np.random.shuffle(permuted_data[i])  # 对每个被试的数据进行独立打乱
    
    # 重新计算随机化数据的Dice系数矩阵
    # 重新计算随机化数据的Dice系数矩阵
    permuted_dice_matrix = np.zeros((num_subjects, num_subjects))
    for i in range(num_subjects):
        for j in range(i, num_subjects):
            intersection = np.sum(np.logical_and(permuted_data[i], permuted_data[j]))
            union = np.sum(np.logical_or(permuted_data[i], permuted_data[j]))
            permuted_dice_matrix[i, j] = 2 * intersection / (np.sum(permuted_data[i]) + np.sum(permuted_data[j]))
            permuted_dice_matrix[j, i] = permuted_dice_matrix[i, j]
    
    # 计算随机化后的平均Dice系数
    permuted_mean_dice = np.mean(permuted_dice_matrix[np.triu_indices(num_subjects, k=1)])
    permutation_means.append(permuted_mean_dice)

# 计算p值
permutation_means = np.array(permutation_means)
p_value = np.mean(permutation_means >= original_mean_dice)

# 输出原始均值和p值
print(f"Original Mean Dice Coefficient: {original_mean_dice}")
print(f"P-value from Randomization Test: {p_value}")


Original Mean Dice Coefficient: 0.01905419617094492
P-value from Randomization Test: 0.0


In [120]:
num_subjects = subs_change_points.shape[0]
dice_matrix = np.zeros((num_subjects, num_subjects))

# 计算每对被试的Dice系数
for i in range(num_subjects):
    for j in range(i, num_subjects):
        intersection = np.sum(np.logical_and(subs_change_points[i], subs_change_points[j]))
        union = np.sum(np.logical_or(subs_change_points[i], subs_change_points[j]))
        # 计算Dice系数
        dice_coefficient = 2 * intersection / (np.sum(subs_change_points[i]) + np.sum(subs_change_points[j]))
        dice_matrix[i, j] = dice_coefficient
        dice_matrix[j, i] = dice_coefficient

# 计算原始数据的中位数Dice系数（只计算上三角，不包括对角线）
original_median_dice = np.median(dice_matrix[np.triu_indices(num_subjects, k=1)])

# 随机化检验
n_permutations = 1000  # 设置随机化次数
permutation_medians = []

# 执行随机化检验
for _ in range(n_permutations):
    # 创建一个副本，不影响原始数据
    permuted_data = np.copy(subs_change_points)
    
    # 对每个被试独立打乱其变化点顺序
    for i in range(num_subjects):
        np.random.shuffle(permuted_data[i])  # 对每个被试的数据进行独立打乱
    
    # 重新计算随机化数据的Dice系数矩阵
    permuted_dice_matrix = np.zeros((num_subjects, num_subjects))
    for i in range(num_subjects):
        for j in range(i, num_subjects):
            intersection = np.sum(np.logical_and(permuted_data[i], permuted_data[j]))
            union = np.sum(np.logical_or(permuted_data[i], permuted_data[j]))
            permuted_dice_matrix[i, j] = 2 * intersection / (np.sum(permuted_data[i]) + np.sum(permuted_data[j]))
            permuted_dice_matrix[j, i] = permuted_dice_matrix[i, j]
    
    # 计算随机化后的中位数Dice系数
    permuted_median_dice = np.median(permuted_dice_matrix[np.triu_indices(num_subjects, k=1)])
    permutation_medians.append(permuted_median_dice)

# 计算p值
permutation_medians = np.array(permutation_medians)
p_value = np.mean(permutation_medians >= original_median_dice)

# 输出原始中位数和p值
print(f"Original Median Dice Coefficient: {original_median_dice}")
print(f"P-value from Randomization Test: {p_value}")


Original Median Dice Coefficient: 0.0
P-value from Randomization Test: 1.0


In [114]:

# 计算描述统计数据
desc_stats = {
    'Mean': np.mean(permutation_means),
    'Std': np.std(permutation_means),
    'Min': np.min(permutation_means),
    'Max': np.max(permutation_means),
    'Median': np.median(permutation_means),
    '25th Percentile': np.percentile(permutation_means, 25),
    '75th Percentile': np.percentile(permutation_means, 75)
}

# 转换为 pandas DataFrame，方便查看
desc_stats_df = pd.DataFrame([desc_stats])

print(desc_stats_df)


       Mean       Std       Min       Max    Median  25th Percentile  \
0  0.008885  0.000862  0.006172  0.012223  0.008831         0.008288   

   75th Percentile  
0         0.009449  


In [100]:

num_subjects = subs_change_points.shape[0]
hamming_matrix = np.zeros((num_subjects, num_subjects))

# 计算每对被试的汉明距离
for i in range(num_subjects):
    for j in range(i+1, num_subjects):
        # 计算被试i和被试j之间的汉明距离
        hamming_distance = np.sum(subs_change_points[i] != subs_change_points[j])  # 不同位置的数量
        hamming_matrix[i, j] = hamming_distance
        hamming_matrix[j, i] = hamming_distance  # 对称矩阵

# 计算原始数据的平均汉明距离
original_mean_hamming = np.mean(hamming_matrix[np.triu_indices(num_subjects, k=1)])
print(f"Original Mean Hamming Distance: {original_mean_hamming}")

# 随机化检验
n_permutations = 1000  # 设置随机化次数
permutation_means = []

# 执行随机化检验
for _ in range(n_permutations):
    permuted_data = np.copy(subs_change_points)
    
    # 随机打乱每个被试的变化点
    for i in range(num_subjects):
        np.random.shuffle(permuted_data[i])  # 随机打乱每个被试的变化点顺序
    
    # 重新计算随机化数据的汉明距离矩阵
    permuted_hamming_matrix = np.zeros((num_subjects, num_subjects))
    for i in range(num_subjects):
        for j in range(i+1, num_subjects):
            permuted_hamming_distance = np.sum(permuted_data[i] != permuted_data[j])
            permuted_hamming_matrix[i, j] = permuted_hamming_distance
            permuted_hamming_matrix[j, i] = permuted_hamming_distance
    
    # 计算随机化后的平均汉明距离
    permuted_mean_hamming = np.mean(permuted_hamming_matrix[np.triu_indices(num_subjects, k=1)])
    permutation_means.append(permuted_mean_hamming)

# 计算p值
permutation_means = np.array(permutation_means)
p_value = np.mean(permutation_means >= original_mean_hamming)

print(f"P-value from Randomization Test: {p_value}")


Original Mean Hamming Distance: 33.38438438438438
P-value from Randomization Test: 1.0


In [108]:
original_mean_dice

0.01905419617094492

In [110]:

mean_dice_counts = dice_matrix[np.triu_indices(num_subjects, k=1)]

# 计算描述统计数据
desc_stats = {
    'Mean': np.mean(mean_dice_counts),
    'Std': np.std(mean_dice_counts),
    'Min': np.min(mean_dice_counts),
    'Max': np.max(mean_dice_counts),
    'Median': np.median(mean_dice_counts),
    '25th Percentile': np.percentile(mean_dice_counts, 25),
    '75th Percentile': np.percentile(mean_dice_counts, 75)
}

# 转换为 pandas DataFrame，方便查看
desc_stats_df = pd.DataFrame([desc_stats])

print(desc_stats_df)


       Mean       Std  Min       Max  Median  25th Percentile  75th Percentile
0  0.019054  0.035401  0.0  0.285714     0.0              0.0         0.037393


In [116]:
proportion_zeros = np.sum(np.array(mean_dice_counts) == 0) / len(mean_dice_counts)
proportion_zeros

0.7102102102102102

In [118]:
len(mean_dice_counts)

666