In [None]:
import pickle


data_path = '/home/lh/HOGO/data/synthetic_data/degroot/times_size2000_m8_1000graphs.pkl'

with open(data_path, "rb") as f:
    data = pickle.load(f)

In [3]:
import os
import numpy as np
import pandas as pd

def read_and_process_times_data(directory):
    results = []

    # 遍历指定目录下的所有子目录
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            # 检查文件名是否以特定前缀开头
            if file.startswith("times_size100000"):
                file_path = os.path.join(subdir, file)
                # 加载数据，假设文件中的数据是以 numpy 的保存格式存储的
                data = np.load(file_path, allow_pickle=True)
                
                # 计算时间和步骤的平均值
                mean_time = np.mean([item[0] for item in data])
                mean_steps = np.mean([item[1] for item in data])

                # 获取子文件夹的名称作为标识符
                folder_name = os.path.basename(subdir)
                
                # 保存结果
                results.append([folder_name, mean_time, mean_steps])
    
    # 将结果转换为 pandas DataFrame
    df = pd.DataFrame(results, columns=["Folder Name", "Average Time", "Average Steps"])
    return df

# 调用函数
directory = "/home/lh/HOGO/data/synthetic_data"
df = read_and_process_times_data(directory)
latex_code = df.to_latex(index=False, header=True)

print(latex_code)

\begin{tabular}{lrr}
\toprule
Folder Name & Average Time & Average Steps \\
\midrule
fj & 63.668618 & 14.000000 \\
degroot & 66.949459 & 15.000000 \\
hk & 211.973091 & 39.055000 \\
mepo & 45.016729 & 10.000000 \\
hk_hetero & 172.207748 & 32.520000 \\
\bottomrule
\end{tabular}



In [6]:
import os
import pandas as pd

def count_lines(file_path):
    with open(file_path, 'r') as file:
        return sum(1 for line in file)

def gather_data(directory):
    data = []
    for root, dirs, files in os.walk(directory):
        for folder in dirs:
            
            folder_path = os.path.join(root, folder)
            edges_file = next((f for f in os.listdir(folder_path) if f.endswith('_edges.txt')), None)
            opinions_file = next((f for f in os.listdir(folder_path) if f.endswith('opinion.txt')), None)
            if edges_file and opinions_file:
                edges_count = count_lines(os.path.join(folder_path, edges_file))
                opinions_count = count_lines(os.path.join(folder_path, opinions_file))
                data.append({'Folder': folder, 'Edges': edges_count, 'Messages': opinions_count})

    return pd.DataFrame(data)

# Define the path to the raw data
raw_data_path = '/home/lh/HOGO/data/raw_data'

# Gather data and create DataFrame
df = gather_data(raw_data_path)

# Convert the DataFrame to LaTeX format
latex_output = df.to_latex(index=False)
print(latex_output)


\begin{tabular}{lrr}
\toprule
Folder & Edges & Messages \\
\midrule
巴以冲突 & 89235 & 49897 \\
德里 & 5271 & 20026 \\
美国大选 & 2482 & 10883 \\
上海疫情 & 21143 & 18502 \\
鼠头鸭脖 & 4253 & 8506 \\
台湾大选 & 29377 & 7566 \\
无标度 & 1491 & 500 \\
小世界 & 2500 & 500 \\
twitter-delhi2013 & 5271 & 20026 \\
\bottomrule
\end{tabular}



In [1]:
import os
import torch

def load_and_transform_data(pt_file_path):
    """
    读取 .pt 文件并转换 x 和 y 张量的值从 [-1, 1] 到 [0, 1]。
    参数:
    pt_file_path (str): .pt 文件的路径。
    """
    # 加载 .pt 文件
    data = torch.load(pt_file_path)

    # 检查 x 和 y 是否包含负数，并进行转换
    if torch.any(data.x < 0) or torch.any(data.y < 0):
        data.x = (data.x + 1) / 2
        data.y = (data.y + 1) / 2

    # 覆盖原文件
    torch.save(data, pt_file_path)
    print(f"Processed and saved: {pt_file_path}")

def process_all_pt_files(root_dir):
    """
    遍历根目录下所有子目录，处理其中的所有 .pt 文件。
    参数:
    root_dir (str): 根目录的路径。
    """
    for subdir, _, files in os.walk(root_dir):
        for file in files:
            if file.endswith('.pt'):
                pt_file_path = os.path.join(subdir, file)
                load_and_transform_data(pt_file_path)

root_dir = '/home/lh/UniGO/data/real_data'
process_all_pt_files(root_dir)

Processed and saved: /home/lh/UniGO/data/real_data/covid/datax0.pt
Processed and saved: /home/lh/UniGO/data/real_data/covid/dataxm.pt
Processed and saved: /home/lh/UniGO/data/real_data/covid/dataxt.pt
Processed and saved: /home/lh/UniGO/data/real_data/delhi/datax0.pt
Processed and saved: /home/lh/UniGO/data/real_data/delhi/dataxm.pt
Processed and saved: /home/lh/UniGO/data/real_data/delhi/dataxt.pt
Processed and saved: /home/lh/UniGO/data/real_data/election/datax0.pt
Processed and saved: /home/lh/UniGO/data/real_data/election/dataxm.pt
Processed and saved: /home/lh/UniGO/data/real_data/election/dataxt.pt
Processed and saved: /home/lh/UniGO/data/real_data/food/datax0.pt
Processed and saved: /home/lh/UniGO/data/real_data/food/dataxm.pt
Processed and saved: /home/lh/UniGO/data/real_data/food/dataxt.pt
Processed and saved: /home/lh/UniGO/data/real_data/IsraelHamas/datax0.pt
Processed and saved: /home/lh/UniGO/data/real_data/IsraelHamas/dataxm.pt
Processed and saved: /home/lh/UniGO/data/rea

In [3]:
import pickle

data_path = "/home/lh/UniGO/data/processed_data_batch_1.pkl"

with open(data_path, "rb") as f:
    data = pickle.load(f)

data

[Data(x=[36, 1000, 15], edge_index=[2, 1996], y=[36, 1000, 50], convergence_step=-1, cluster_node_indices=[1000], cluster_ptr=[4]),
 Data(x=[36, 1000, 15], edge_index=[2, 1996], y=[36, 1000, 50], convergence_step=-1, cluster_node_indices=[1000], cluster_ptr=[4]),
 Data(x=[36, 1000, 15], edge_index=[2, 1996], y=[36, 1000, 50], convergence_step=-1, cluster_node_indices=[1000], cluster_ptr=[4]),
 Data(x=[36, 1000, 15], edge_index=[2, 1996], y=[36, 1000, 50], convergence_step=-1, cluster_node_indices=[1000], cluster_ptr=[4]),
 Data(x=[36, 1000, 15], edge_index=[2, 1996], y=[36, 1000, 50], convergence_step=-1, cluster_node_indices=[1000], cluster_ptr=[4]),
 Data(x=[36, 1000, 15], edge_index=[2, 1996], y=[36, 1000, 50], convergence_step=11, cluster_node_indices=[1000], cluster_ptr=[4]),
 Data(x=[36, 1000, 15], edge_index=[2, 1996], y=[36, 1000, 50], convergence_step=11, cluster_node_indices=[1000], cluster_ptr=[4]),
 Data(x=[36, 1000, 15], edge_index=[2, 1996], y=[36, 1000, 50], convergence_