In [1]:
import scipy.io
import numpy as np
import networkx as nx
import os
import json
from scipy.stats import ttest_ind, ttest_rel
from tqdm import tqdm

In [2]:
# 储存文件路径
folder_path = '/Users/shenxiaoyu/Desktop/FC'

file_paths = [
    'shamfc_s1.mat', 'shamfc_s2.mat', 'shamfc_s3.mat',
    'tacs6fc_s1.mat', 'tacs6fc_s2.mat', 'tacs6fc_s3.mat',
    'tacs10fc_s1.mat', 'tacs10fc_s2.mat', 'tacs10fc_s3.mat',
    'tdcsfc_s1.mat', 'tdcsfc_s2.mat', 'tdcsfc_s3.mat'
]


# preprocessing brain data
def data_preprocessing(matrix, fc_threshold=0.3, zfc_threshold=1.5):
    # matrix = np.where(np.isinf(matrix), np.nan, matrix)  # Replace infinite values with NaN
    
    # Apply threshold to create a binary adjacency matrix
    threshold = zfc_threshold if 'zfc' in file_path.lower() else fc_threshold
    binary_weighted_matrix = np.where(np.abs(matrix) >= threshold, matrix, 0)
    
    # Remove self-loops
    np.fill_diagonal(binary_weighted_matrix, 0)
    
    return binary_weighted_matrix


# Function to save metrics
def save_metrics(metrics, filename='metrics.json'):
    with open(filename, 'w') as f:
        json.dump(metrics, f)

# Function to load metrics
def load_metrics(filename='metrics.json'):
    if os.path.exists(filename):
        with open(filename, 'r') as f:
            return json.load(f)
    return {}

def clear_specific_property_from_metrics(property_name, filename='metrics.json'):
    if os.path.exists(filename):
        with open(filename, 'r') as f:
            metrics = json.load(f)
        for graph_id in metrics:
            if property_name in metrics[graph_id]:
                del metrics[graph_id][property_name]
        with open(filename, 'w') as f:
            json.dump(metrics, f)
    else:
        print("Metrics file not found.")

# Uncomment the line below if you want to clear a specific computed result
#clear_specific_property_from_metrics('rich_club_coefficient')


# Function to delete the metrics file
def delete_metrics_file(filename='metrics.json'):
    if os.path.exists(filename):
        os.remove(filename)

# Uncomment the line below if you want to empty all previously computed results
#delete_metrics_file()

# 创建一个dict来存储每种条件下的graph
graphs = {
    'sham': {'s1': [], 's2': [], 's3': []},
    'tacs6': {'s1': [], 's2': [], 's3': []},
    'tacs10': {'s1': [], 's2': [], 's3': []},
    'tdcs': {'s1': [], 's2': [], 's3': []}
}

# 读取文件里的matrix和提取ZFC
for file_name in file_paths:
    file_path = os.path.join(folder_path, file_name)
    mat = scipy.io.loadmat(file_path)
    key = [k for k in mat.keys() if not k.startswith('__')][0]  # 提取主要数据的key
    matrix = mat[key]
    
    # condition是刺激类型, time_point对应刺激前中后
    base_name = file_name.replace('.mat', '')
    parts = base_name.split('_')
    if len(parts) == 2:
        condition_time, time_point = parts
        if 'zfc' in condition_time:
            condition = condition_time.replace('zfc', '')
            matrix_type = 'zfc'
        else:
            condition = condition_time.replace('fc', '')
            matrix_type = 'fc'
    else:
        condition, matrix_type, time_point = parts

    
    if matrix_type == 'fc':
        # 将其转换为graph
        for i in range(matrix.shape[0]):
            adj_matrix_binary = data_preprocessing(matrix[i])
            G = nx.from_numpy_matrix(adj_matrix_binary)
            graphs[condition][time_point].append(G)   

In [3]:
# WhitakerLab. (n.d.). Scona: Structural Covariance Networks. Retrieved [date], from https://whitakerlab.github.io/scona/_modules/scona/graph_measures.html
def participation_coefficient(G, module_partition):
    """ Calculate the participation coefficient for each node in the graph. """
    pc_dict = {}

    for m in module_partition.keys():
        M = set(module_partition[m])
        for v in M:
            degree = float(nx.degree(G=G, nbunch=v))
            if degree == 0:
                pc_dict[v] = 0
                continue

            wm_degree = float(sum([1 for u in M if (u, v) in G.edges()]))
            pc_dict[v] = 1 - ((float(wm_degree) / float(degree))**2)

    return pc_dict

In [4]:
# 定义所需要计算的graph property
def compute_graph_metrics(G, existing_metrics=None):
    
    metrics = existing_metrics if existing_metrics else {}
    
    if 'degree_centrality' not in metrics:
        metrics['degree_centrality'] = np.mean(list(nx.degree_centrality(G).values()))
        
    if 'betweenness_centrality' not in metrics:
        metrics['betweenness_centrality'] = np.mean(list(nx.betweenness_centrality(G, weight='weight').values()))
        
    if 'eigenvector_centrality' not in metrics:
        metrics['eigenvector_centrality'] = np.mean(list(nx.eigenvector_centrality(G, max_iter=10000, weight='weight').values()))
        
    if 'clustering_coefficient' not in metrics:
        metrics['clustering_coefficient'] = nx.average_clustering(G, weight='weight')
        
    if 'modularity' not in metrics:
        communities = list(nx.community.greedy_modularity_communities(G, weight='weight'))
        metrics['modularity'] = nx.algorithms.community.quality.modularity(G, communities, weight='weight')
        
    if 'rich_club_coefficient' not in metrics:
        try:
            rich_club = nx.rich_club_coefficient(G)
            avg_rich_club_coefficient = np.mean([v for v in rich_club.values() if v is not None])
        except ZeroDivisionError:
            avg_rich_club_coefficient = None
        metrics['rich_club_coefficient'] = avg_rich_club_coefficient
        
    if 'participation_coefficient' not in metrics:
        communities = list(nx.community.greedy_modularity_communities(G, weight='weight'))
        module_partition = {i: list(c) for i, c in enumerate(communities)}
        pc = participation_coefficient(G, module_partition)
        metrics['participation_coefficient'] = np.mean(list(pc.values()))
                                                           
    if 'connected_components' not in metrics:
        metrics['connected_components'] = nx.number_connected_components(G)
                                                           
    if 'minimum_spanning_tree' not in metrics:
        metrics['minimum_spanning_tree'] = nx.minimum_spanning_tree(G, weight='weight', ignore_nan=True).size(weight='weight') if nx.is_connected(G) else None
                                                           
    if 'cut_vertices' not in metrics:
        metrics['cut_vertices'] = len(list(nx.articulation_points(G)))
                                                           
    if 'biconnected_components' not in metrics:
        metrics['biconnected_components'] = len(list(nx.biconnected_components(G)))
    
    if 'maximum_spanning_tree' not in metrics:
        metrics['maximum_spanning_tree'] = nx.maximum_spanning_tree(G, weight='weight', ignore_nan=True).size(weight='weight') if nx.is_connected(G) else None
                                                           
        
    return metrics


# Store the computed metrics
metrics = load_metrics()


total_graphs = sum(len(graphs[condition][time_point]) for condition in graphs for time_point in graphs[condition])
with tqdm(total=total_graphs, desc="Overall Progress") as pbar:
    for condition in graphs:
        for time_point in graphs[condition]:
            for idx, G in enumerate(graphs[condition][time_point]):
                graph_id = f"{condition}_{time_point}_{idx}"
                if graph_id not in metrics:
                    metrics[graph_id] = compute_graph_metrics(G, metrics.get(graph_id, {}))
                pbar.update(1)
                
                

# Save the computed metrics to avoid redundant computation
save_metrics(metrics)

Overall Progress: 100%|█████████████████████| 252/252 [2:57:49<00:00, 42.34s/it]


In [5]:
def compare_analysis(metrics, condition1, time_point1, condition2, time_point2, graph_property, paired=False):
    data1 = [metrics[f"{condition1}_{time_point1}_{i}"][graph_property] for i in range(20) if f"{condition1}_{time_point1}_{i}" in metrics and metrics[f"{condition1}_{time_point1}_{i}"].get(graph_property) is not None]
    data2 = [metrics[f"{condition2}_{time_point2}_{i}"][graph_property] for i in range(20) if f"{condition2}_{time_point2}_{i}" in metrics and metrics[f"{condition2}_{time_point2}_{i}"].get(graph_property) is not None]
    
    # Filter out None values
    data1 = [x for x in data1 if x is not None]
    data2 = [x for x in data2 if x is not None]
    
    if len(data1) > 0 and len(data2) > 0:
        if paired:
            t_stat, p_value = ttest_rel(data1, data2)
        else:
            t_stat, p_value = ttest_ind(data1, data2)
    else:
        t_stat, p_value = None, None
    return t_stat, p_value


In [6]:
# 1. 验证四组s1相似性 
print("S_1:")
for condition in ['tacs6', 'tacs10', 'tdcs']:
    for graph_property in ['degree_centrality', 'betweenness_centrality', 'eigenvector_centrality', 'clustering_coefficient', 'modularity', 'rich_club_coefficient', 'participation_coefficient', 'connected_components', 'minimum_spanning_tree', 'cut_vertices', 'biconnected_components', 'maximum_spanning_tree']:
        t_stat, p_value = compare_analysis(metrics, 'sham', 's1', condition, 's1', graph_property)
        print(f"Sham S1 vs {condition} S1 - {graph_property}: T-statistic: {t_stat}, P-value: {p_value}")

S_1:
Sham S1 vs tacs6 S1 - degree_centrality: T-statistic: -1.5158222907222518, P-value: 0.1378394915305021
Sham S1 vs tacs6 S1 - betweenness_centrality: T-statistic: -0.06518869725539353, P-value: 0.9483655082561846
Sham S1 vs tacs6 S1 - eigenvector_centrality: T-statistic: 0.8196114675389528, P-value: 0.4175488714071829
Sham S1 vs tacs6 S1 - clustering_coefficient: T-statistic: -1.1491839808690936, P-value: 0.25766211106588716
Sham S1 vs tacs6 S1 - modularity: T-statistic: -0.7018969506731698, P-value: 0.4870204950708683
Sham S1 vs tacs6 S1 - rich_club_coefficient: T-statistic: 0.34610849959614215, P-value: 0.7320463417649827
Sham S1 vs tacs6 S1 - participation_coefficient: T-statistic: 0.2407976902740493, P-value: 0.8110064089051046
Sham S1 vs tacs6 S1 - connected_components: T-statistic: nan, P-value: nan
Sham S1 vs tacs6 S1 - minimum_spanning_tree: T-statistic: 0.39114022822619876, P-value: 0.6978781805829348
Sham S1 vs tacs6 S1 - cut_vertices: T-statistic: 1.0000000000000002, P-v

  t_stat, p_value = ttest_ind(data1, data2)


In [7]:
# 2. 验证真实刺激的s2与sham组的对比
print("S_2:")
for condition in ['tacs6', 'tacs10', 'tdcs']:
    for graph_property in ['degree_centrality', 'betweenness_centrality', 'eigenvector_centrality', 'clustering_coefficient', 'modularity', 'rich_club_coefficient', 'participation_coefficient', 'connected_components', 'minimum_spanning_tree', 'cut_vertices', 'biconnected_components', 'maximum_spanning_tree']:
        # 比较 s2
        t_stat, p_value = compare_analysis(metrics, 'sham', 's2', condition, 's2', graph_property)
        print(f"{condition} S2 vs Sham S2 - {graph_property}: T-statistic: {t_stat}, P-value: {p_value}")

S_2:
tacs6 S2 vs Sham S2 - degree_centrality: T-statistic: -2.5000370465293886, P-value: 0.016851970127870666
tacs6 S2 vs Sham S2 - betweenness_centrality: T-statistic: 0.11258502048982325, P-value: 0.9109522208280199
tacs6 S2 vs Sham S2 - eigenvector_centrality: T-statistic: 0.6479644464692832, P-value: 0.5209040924038947
tacs6 S2 vs Sham S2 - clustering_coefficient: T-statistic: -1.8350002986651084, P-value: 0.07434260827546142
tacs6 S2 vs Sham S2 - modularity: T-statistic: -1.0395522433712003, P-value: 0.3051174874745034
tacs6 S2 vs Sham S2 - rich_club_coefficient: T-statistic: 2.8740280701322103, P-value: 0.007144631606282836
tacs6 S2 vs Sham S2 - participation_coefficient: T-statistic: -0.26949411366644777, P-value: 0.7890075065486146
tacs6 S2 vs Sham S2 - connected_components: T-statistic: 0.0, P-value: 1.0
tacs6 S2 vs Sham S2 - minimum_spanning_tree: T-statistic: 0.742582204954819, P-value: 0.46255285680288805
tacs6 S2 vs Sham S2 - cut_vertices: T-statistic: nan, P-value: nan
ta

  t_stat, p_value = ttest_ind(data1, data2)


In [8]:
# 2. 验证真实刺激的s3与sham组的对比
print("S_3:")
for condition in ['tacs6', 'tacs10', 'tdcs']:
    for graph_property in ['degree_centrality', 'betweenness_centrality', 'eigenvector_centrality', 'clustering_coefficient', 'modularity', 'rich_club_coefficient', 'participation_coefficient', 'connected_components', 'minimum_spanning_tree', 'cut_vertices', 'biconnected_components', 'maximum_spanning_tree']:        
        # 比较 s3
        t_stat, p_value = compare_analysis(metrics, 'sham', 's3', condition, 's3', graph_property)
        print(f"{condition} S3 vs Sham S3 - {graph_property}: T-statistic: {t_stat}, P-value: {p_value}")

S_3:
tacs6 S3 vs Sham S3 - degree_centrality: T-statistic: -1.9688671644728732, P-value: 0.05629320572296763
tacs6 S3 vs Sham S3 - betweenness_centrality: T-statistic: -0.6569802725854058, P-value: 0.5151531538461004
tacs6 S3 vs Sham S3 - eigenvector_centrality: T-statistic: -0.4604495883441444, P-value: 0.6478177528874999
tacs6 S3 vs Sham S3 - clustering_coefficient: T-statistic: -1.2995440505988882, P-value: 0.20158617487909292
tacs6 S3 vs Sham S3 - modularity: T-statistic: -0.08164400854907589, P-value: 0.9353583278529278
tacs6 S3 vs Sham S3 - rich_club_coefficient: T-statistic: 1.2176336616883658, P-value: 0.23285427196484332
tacs6 S3 vs Sham S3 - participation_coefficient: T-statistic: -1.3568620780160368, P-value: 0.1828310590323425
tacs6 S3 vs Sham S3 - connected_components: T-statistic: 0.0, P-value: 1.0
tacs6 S3 vs Sham S3 - minimum_spanning_tree: T-statistic: 2.3270524338734013, P-value: 0.025703302893485346
tacs6 S3 vs Sham S3 - cut_vertices: T-statistic: 2.0323471117426513,

  t_stat, p_value = ttest_ind(data1, data2)
