In [1]:
import scipy.io
import numpy as np
import networkx as nx
import os
import json
import time   #预估运行时间(可删除)
from scipy.stats import ttest_ind, ttest_rel

NameError: name 'nx' is not defined

In [2]:
# 储存文件路径
folder_path = '/Users/shenxiaoyu/Desktop/FC'

file_paths = [
    'shamfc_s1.mat', 'shamfc_s2.mat', 'shamfc_s3.mat',
    'tacs6fc_s1.mat', 'tacs6fc_s2.mat', 'tacs6fc_s3.mat',
    'tacs10fc_s1.mat', 'tacs10fc_s2.mat', 'tacs10fc_s3.mat',
    'tdcsfc_s1.mat', 'tdcsfc_s2.mat', 'tdcsfc_s3.mat'
]


# preprocessing brain data
def data_preprocessing(matrix, fc_threshold=0.5, zfc_threshold=1.5):
    # matrix = np.where(np.isinf(matrix), np.nan, matrix)  # Replace infinite values with NaN
    
    # Apply threshold to create a binary adjacency matrix
    threshold = zfc_threshold if 'zfc' in file_path.lower() else fc_threshold
    binary_weighted_matrix = np.where(matrix >= fc_threshold, matrix, 0)
    
    # Remove self-loops
    np.fill_diagonal(binary_weighted_matrix, 0)
    
    return binary_weighted_matrix


# Function to save metrics
def save_metrics(metrics, filename='metrics.json'):
    with open(filename, 'w') as f:
        json.dump(metrics, f)

# Function to load metrics
def load_metrics(filename='metrics.json'):
    if os.path.exists(filename):
        with open(filename, 'r') as f:
            return json.load(f)
    return {}

def clear_specific_property_from_metrics(property_name, filename='metrics.json'):
    if os.path.exists(filename):
        with open(filename, 'r') as f:
            metrics = json.load(f)
        for graph_id in metrics:
            if property_name in metrics[graph_id]:
                del metrics[graph_id][property_name]
        with open(filename, 'w') as f:
            json.dump(metrics, f)
    else:
        print("Metrics file not found.")

# Clear specific property before recomputing
#clear_specific_property_from_metrics('rich_club_coefficient')

#clear_specific_property_from_metrics('participation_coefficient')
# Function to delete the metrics file
def delete_metrics_file(filename='metrics.json'):
    if os.path.exists(filename):
        os.remove(filename)

# Delete the metrics file before recomputing
delete_metrics_file()

# 创建一个dict来存储每种条件下的graph
graphs = {
    'sham': {'s1': [], 's2': [], 's3': []},
    'tacs6': {'s1': [], 's2': [], 's3': []},
    'tacs10': {'s1': [], 's2': [], 's3': []},
    'tdcs': {'s1': [], 's2': [], 's3': []}
}

# 读取文件里的matrix和提取ZFC
for file_name in file_paths:
    file_path = os.path.join(folder_path, file_name)
    mat = scipy.io.loadmat(file_path)
    key = [k for k in mat.keys() if not k.startswith('__')][0]  # 提取主要数据的key
    matrix = mat[key]
    
    # condition是刺激类型, time_point对应刺激前中后
    base_name = file_name.replace('.mat', '')
    parts = base_name.split('_')
    if len(parts) == 2:
        condition_time, time_point = parts
        if 'zfc' in condition_time:
            condition = condition_time.replace('zfc', '')
            matrix_type = 'zfc'
        else:
            condition = condition_time.replace('fc', '')
            matrix_type = 'fc'
    else:
        condition, matrix_type, time_point = parts

    
    if matrix_type == 'fc':
        # 将其转换为graph
        for i in range(matrix.shape[0]):
            adj_matrix_binary = data_preprocessing(matrix[i])
            G = nx.from_numpy_matrix(adj_matrix_binary)
            graphs[condition][time_point].append(G)   

In [3]:
# WhitakerLab. (n.d.). Scona: Structural Covariance Networks. Retrieved [date], from https://whitakerlab.github.io/scona/_modules/scona/graph_measures.html
def participation_coefficient(G, module_partition):
    """ Calculate the participation coefficient for each node in the graph. """
    pc_dict = {}

    for m in module_partition.keys():
        M = set(module_partition[m])
        for v in M:
            degree = float(nx.degree(G=G, nbunch=v))
            if degree == 0:
                pc_dict[v] = 0
                continue

            wm_degree = float(sum([1 for u in M if (u, v) in G.edges()]))
            pc_dict[v] = 1 - ((float(wm_degree) / float(degree))**2)

    return pc_dict

In [4]:
# 定义所需要计算的graph property
def compute_graph_metrics(G, existing_metrics=None):
    
    metrics = existing_metrics if existing_metrics else {}
    
    if 'degree_centrality' not in metrics:
        metrics['degree_centrality'] = np.mean(list(nx.degree_centrality(G).values()))
        
    if 'betweenness_centrality' not in metrics:
        metrics['betweenness_centrality'] = np.mean(list(nx.betweenness_centrality(G, weight='weight').values()))
        
    if 'eigenvector_centrality' not in metrics:
        metrics['eigenvector_centrality'] = np.mean(list(nx.eigenvector_centrality(G, max_iter=10000, weight='weight').values()))
        
    if 'clustering_coefficient' not in metrics:
        metrics['clustering_coefficient'] = nx.average_clustering(G, weight='weight')
        
    if 'modularity' not in metrics:
        communities = list(nx.community.greedy_modularity_communities(G, weight='weight'))
        metrics['modularity'] = nx.algorithms.community.quality.modularity(G, communities, weight='weight')
        
    if 'rich_club_coefficient' not in metrics:
        try:
            rich_club = nx.rich_club_coefficient(G)
            avg_rich_club_coefficient = np.mean([v for v in rich_club.values() if v is not None])
        except ZeroDivisionError:
            avg_rich_club_coefficient = None
        metrics['rich_club_coefficient'] = avg_rich_club_coefficient
        
    if 'participation_coefficient' not in metrics:
        communities = list(nx.community.greedy_modularity_communities(G, weight='weight'))
        module_partition = {i: list(c) for i, c in enumerate(communities)}
        pc = participation_coefficient(G, module_partition)
        metrics['participation_coefficient'] = np.mean(list(pc.values()))
        
    if 'path_length' not in metrics:
        components = list(nx.connected_components(G))
        component_lengths = []
        for component in components:
            subgraph = G.subgraph(component)
            if len(component) > 1:  # Ensure the component has more than one node
                length = nx.average_shortest_path_length(subgraph, weight='weight')
                component_lengths.append(length)
        avg_length = np.mean(component_lengths)
        metrics['path_length'] = avg_length
                                                           
    if 'connected_components' not in metrics:
        metrics['connected_components'] = nx.number_connected_components(G)
                                                           
    if 'minimum_spanning_tree' not in metrics:
        if nx.is_connected(G):
            metrics['minimum_spanning_tree'] = nx.minimum_spanning_tree(G, weight='weight').size(weight='weight')
        else:
            mst_edges = nx.minimum_spanning_edges(G, weight='weight', data=False)
            mst_weight = sum(G[u][v]['weight'] for u, v in mst_edges)
            metrics['minimum_spanning_tree'] = mst_weight 
                                                           
    if 'cut_vertices' not in metrics:
        metrics['cut_vertices'] = len(list(nx.articulation_points(G)))
                                                           
    if 'biconnected_components' not in metrics:
        metrics['biconnected_components'] = len(list(nx.biconnected_components(G)))
    
        
    return metrics


# Store the computed metrics
metrics = load_metrics()

# 预估运行时间(可删除)
metrics_start_time = time.time()
total_graphs = sum(len(graphs[condition][time_point]) for condition in graphs for time_point in graphs[condition])
processed_graphs = 0

# Compute metrics for all graphs
for condition in graphs:
    for time_point in graphs[condition]:
        for idx, G in enumerate(graphs[condition][time_point]):
            graph_id = f"{condition}_{time_point}_{idx}"
            if graph_id not in metrics:
                metrics[graph_id] = compute_graph_metrics(G, metrics.get(graph_id, {}))
                
                
                # 预估运行时间(可删除)
                processed_graphs += 1
                elapsed_time = time.time() - metrics_start_time
                estimated_total_time = (elapsed_time / processed_graphs) * total_graphs
                remaining_time = estimated_total_time - elapsed_time
                print(f"Processed {processed_graphs}/{total_graphs} graphs. Estimated remaining time: {remaining_time:.2f} seconds.")

# Save the computed metrics
save_metrics(metrics)

Processed 1/252 graphs. Estimated remaining time: 1532.85 seconds.
Processed 2/252 graphs. Estimated remaining time: 1401.05 seconds.
Processed 3/252 graphs. Estimated remaining time: 1406.11 seconds.
Processed 4/252 graphs. Estimated remaining time: 1873.95 seconds.
Processed 5/252 graphs. Estimated remaining time: 1912.39 seconds.
Processed 6/252 graphs. Estimated remaining time: 1857.24 seconds.
Processed 7/252 graphs. Estimated remaining time: 1876.78 seconds.
Processed 8/252 graphs. Estimated remaining time: 1776.72 seconds.
Processed 9/252 graphs. Estimated remaining time: 1722.76 seconds.
Processed 10/252 graphs. Estimated remaining time: 1671.85 seconds.
Processed 11/252 graphs. Estimated remaining time: 1662.14 seconds.
Processed 12/252 graphs. Estimated remaining time: 1663.43 seconds.
Processed 13/252 graphs. Estimated remaining time: 1832.96 seconds.
Processed 14/252 graphs. Estimated remaining time: 1775.60 seconds.
Processed 15/252 graphs. Estimated remaining time: 1744.8

Processed 122/252 graphs. Estimated remaining time: 1047.19 seconds.
Processed 123/252 graphs. Estimated remaining time: 1036.08 seconds.
Processed 124/252 graphs. Estimated remaining time: 1027.49 seconds.
Processed 125/252 graphs. Estimated remaining time: 1016.49 seconds.
Processed 126/252 graphs. Estimated remaining time: 1009.70 seconds.
Processed 127/252 graphs. Estimated remaining time: 1000.79 seconds.
Processed 128/252 graphs. Estimated remaining time: 994.02 seconds.
Processed 129/252 graphs. Estimated remaining time: 984.10 seconds.
Processed 130/252 graphs. Estimated remaining time: 974.17 seconds.
Processed 131/252 graphs. Estimated remaining time: 965.14 seconds.
Processed 132/252 graphs. Estimated remaining time: 957.13 seconds.
Processed 133/252 graphs. Estimated remaining time: 951.43 seconds.
Processed 134/252 graphs. Estimated remaining time: 941.05 seconds.
Processed 135/252 graphs. Estimated remaining time: 929.91 seconds.
Processed 136/252 graphs. Estimated remain

Processed 243/252 graphs. Estimated remaining time: 73.07 seconds.
Processed 244/252 graphs. Estimated remaining time: 64.96 seconds.
Processed 245/252 graphs. Estimated remaining time: 57.23 seconds.
Processed 246/252 graphs. Estimated remaining time: 49.09 seconds.
Processed 247/252 graphs. Estimated remaining time: 40.87 seconds.
Processed 248/252 graphs. Estimated remaining time: 32.65 seconds.
Processed 249/252 graphs. Estimated remaining time: 24.49 seconds.
Processed 250/252 graphs. Estimated remaining time: 16.33 seconds.
Processed 251/252 graphs. Estimated remaining time: 8.16 seconds.
Processed 252/252 graphs. Estimated remaining time: 0.00 seconds.


In [5]:
def compare_analysis(metrics, condition1, time_point1, condition2, time_point2, graph_property, paired=False):
    data1 = [metrics[f"{condition1}_{time_point1}_{i}"][graph_property] for i in range(20) if f"{condition1}_{time_point1}_{i}" in metrics and metrics[f"{condition1}_{time_point1}_{i}"].get(graph_property) is not None]
    data2 = [metrics[f"{condition2}_{time_point2}_{i}"][graph_property] for i in range(20) if f"{condition2}_{time_point2}_{i}" in metrics and metrics[f"{condition2}_{time_point2}_{i}"].get(graph_property) is not None]
    
    # Filter out None values
    data1 = [x for x in data1 if x is not None]
    data2 = [x for x in data2 if x is not None]
    
    if len(data1) > 0 and len(data2) > 0:
        if paired:
            t_stat, p_value = ttest_rel(data1, data2)
        else:
            t_stat, p_value = ttest_ind(data1, data2)
    else:
        t_stat, p_value = None, None
    return t_stat, p_value


In [6]:
# 1. 验证四组s1相似性 
print("S_1:")
for condition in ['tacs6', 'tacs10', 'tdcs']:
    for graph_property in ['degree_centrality', 'betweenness_centrality', 'eigenvector_centrality', 'clustering_coefficient', 'modularity', 'rich_club_coefficient', 'participation_coefficient', 'path_length', 'connected_components', 'minimum_spanning_tree', 'cut_vertices', 'biconnected_components', 'path_length']:
        t_stat, p_value = compare_analysis(metrics, 'sham', 's1', condition, 's1', graph_property)
        print(f"Sham S1 vs {condition} S1 - {graph_property}: T-statistic: {t_stat}, P-value: {p_value}")

S_1:
Sham S1 vs tacs6 S1 - degree_centrality: T-statistic: -1.356238985597168, P-value: 0.18302745841402832
Sham S1 vs tacs6 S1 - betweenness_centrality: T-statistic: 0.5166641707294843, P-value: 0.6083846645652429
Sham S1 vs tacs6 S1 - eigenvector_centrality: T-statistic: 0.580435842100867, P-value: 0.5650471838419071
Sham S1 vs tacs6 S1 - clustering_coefficient: T-statistic: -1.8530498345070596, P-value: 0.07165263344467601
Sham S1 vs tacs6 S1 - modularity: T-statistic: 0.029343902043755518, P-value: 0.9767438889378435
Sham S1 vs tacs6 S1 - rich_club_coefficient: T-statistic: 0.2224669619051357, P-value: 0.8264551600320729
Sham S1 vs tacs6 S1 - participation_coefficient: T-statistic: -0.4278435467158241, P-value: 0.6711800982587
Sham S1 vs tacs6 S1 - path_length: T-statistic: -0.2283851480391083, P-value: 0.8205714169551995
Sham S1 vs tacs6 S1 - connected_components: T-statistic: 1.7650809469888495, P-value: 0.08558613728311856
Sham S1 vs tacs6 S1 - minimum_spanning_tree: T-statistic

In [7]:
# 2. 验证真实刺激的s2与sham组的对比
print("S_2:")
for condition in ['tacs6', 'tacs10', 'tdcs']:
    for graph_property in ['degree_centrality', 'betweenness_centrality', 'eigenvector_centrality', 'clustering_coefficient', 'modularity', 'rich_club_coefficient', 'participation_coefficient', 'path_length', 'connected_components', 'minimum_spanning_tree', 'cut_vertices', 'biconnected_components', 'path_length']:
        # 比较 s2
        t_stat, p_value = compare_analysis(metrics, 'sham', 's2', condition, 's2', graph_property)
        print(f"{condition} S2 vs Sham S2 - {graph_property}: T-statistic: {t_stat}, P-value: {p_value}")

S_2:
tacs6 S2 vs Sham S2 - degree_centrality: T-statistic: -1.610131470067859, P-value: 0.11564616832232982
tacs6 S2 vs Sham S2 - betweenness_centrality: T-statistic: 1.3131529559045905, P-value: 0.19700604505660846
tacs6 S2 vs Sham S2 - eigenvector_centrality: T-statistic: -1.2256022785251577, P-value: 0.227890320941281
tacs6 S2 vs Sham S2 - clustering_coefficient: T-statistic: -1.5010242810287713, P-value: 0.14161353425746148
tacs6 S2 vs Sham S2 - modularity: T-statistic: 0.13560666418630687, P-value: 0.8928480550199416
tacs6 S2 vs Sham S2 - rich_club_coefficient: T-statistic: 1.6205502359686261, P-value: 0.1187450945476144
tacs6 S2 vs Sham S2 - participation_coefficient: T-statistic: -0.5020911578964928, P-value: 0.6185004893962899
tacs6 S2 vs Sham S2 - path_length: T-statistic: 0.07019983560539199, P-value: 0.9444027469419529
tacs6 S2 vs Sham S2 - connected_components: T-statistic: 1.0713214601305328, P-value: 0.29078221808837657
tacs6 S2 vs Sham S2 - minimum_spanning_tree: T-stati

In [8]:
# 2. 验证真实刺激的s3与sham组的对比
print("S_3:")
for condition in ['tacs6', 'tacs10', 'tdcs']:
    for graph_property in ['degree_centrality', 'betweenness_centrality', 'eigenvector_centrality', 'clustering_coefficient', 'modularity', 'rich_club_coefficient', 'participation_coefficient', 'path_length', 'connected_components', 'minimum_spanning_tree', 'cut_vertices', 'biconnected_components']:        
        # 比较 s3
        t_stat, p_value = compare_analysis(metrics, 'sham', 's3', condition, 's3', graph_property)
        print(f"{condition} S3 vs Sham S3 - {graph_property}: T-statistic: {t_stat}, P-value: {p_value}")

S_3:
tacs6 S3 vs Sham S3 - degree_centrality: T-statistic: -1.4410785163725837, P-value: 0.15775262634705217
tacs6 S3 vs Sham S3 - betweenness_centrality: T-statistic: 0.33476293924078254, P-value: 0.7396447915062847
tacs6 S3 vs Sham S3 - eigenvector_centrality: T-statistic: -0.6784223827779217, P-value: 0.5016147550072982
tacs6 S3 vs Sham S3 - clustering_coefficient: T-statistic: -1.1327823043353418, P-value: 0.26440275718427203
tacs6 S3 vs Sham S3 - modularity: T-statistic: 2.064614185387995, P-value: 0.04583312153398437
tacs6 S3 vs Sham S3 - rich_club_coefficient: T-statistic: -0.7499614061201181, P-value: 0.46246422303375323
tacs6 S3 vs Sham S3 - participation_coefficient: T-statistic: -2.0955052339757057, P-value: 0.04284239934843639
tacs6 S3 vs Sham S3 - path_length: T-statistic: -1.5923230720460275, P-value: 0.11959771153456851
tacs6 S3 vs Sham S3 - connected_components: T-statistic: 2.1629533091032114, P-value: 0.036902118690777294
tacs6 S3 vs Sham S3 - minimum_spanning_tree: T