## Transform Query Graph

In [None]:
import os

def convert_graph_format(graph_data):
    """
    将原始查询图格式转换为新格式
    原格式：v <id> <label>, e <id1> <id2> <label>
    新格式：v <id> <label>, e <id1> <id2>
    """
    lines = graph_data.strip().split("\n")
    result = []
    
    for line in lines:
        parts = line.strip().split()
        if parts[0] == 'v':  # 顶点信息保持不变
            result.append(line)
        elif parts[0] == 'e':  # 边信息中移除标签
            new_edge = f"e {parts[1]} {parts[2]}"
            result.append(new_edge)
    
    return "\n".join(result)

dir = "/home/cc/haibin2/amazon/6/query_graph/sparse_6/"

result_dir = "amazon/" 
name = "Q_1"

# 文件路径
input_path = dir + name
output_path = result_dir + name

# 从文件读取
with open(input_path, "r") as f:
    query_data = f.read()

# 执行转换
converted_graph = convert_graph_format(query_data)

# 保存结果
with open(output_path, "w") as f:
    f.write(converted_graph)

print(f"转换后的图已保存至 {output_path}")
print("\n转换结果预览:")
print(converted_graph[:200] + "..." if len(converted_graph) > 200 else converted_graph)

转换后的图已保存至 livejournal/Q_1

转换结果预览:
v 0 4
v 1 5
v 2 1
v 3 3
v 4 2
v 5 2
e 0 1
e 0 4
e 1 5
e 2 5
e 3 4
e 4 5


## Transform DataGraph

In [5]:
import os

def convert_query_graph_format(graph_data):
    """
    将原始查询图格式转换为新格式
    原格式：v <id> <label>, e <id1> <id2> <label>
    新格式：v <id> <label>, e <id1> <id2>
    """
    lines = graph_data.strip().split("\n")
    result = []
    
    for line in lines:
        parts = line.strip().split()
        if parts[0] == 'v':  # 顶点信息保持不变
            result.append(line)
        elif parts[0] == 'e':  # 边信息中移除标签
            new_edge = f"e {parts[1]} {parts[2]}"
            result.append(new_edge)
    
    return "\n".join(result)

def convert_data_graph_format(graph_data):
    """
    将原始数据图格式转换为新格式
    - 移除边的标签
    - 确保先输出所有顶点，再输出所有边
    原格式：v <id> <label>, e <id1> <id2> <label>（可能是乱序的）
    新格式：v <id> <label>（所有顶点）, e <id1> <id2>（所有边）
    """
    lines = graph_data.strip().split("\n")
    vertices = []
    edges = []
    
    # 分离顶点和边
    for line in lines:
        parts = line.strip().split()
        if not parts:  # 跳过空行
            continue
            
        if parts[0] == 'v':  # 顶点信息
            vertices.append(line)
        elif parts[0] == 'e':  # 边信息
            new_edge = f"e {parts[1]} {parts[2]}"  # 移除边的标签
            edges.append(new_edge)
    
    # 合并结果，先顶点后边
    result = vertices + edges
    return "\n".join(result)

# 查询图处理
def process_query_graph():
    dir_path = "/home/cc/haibin2/amazon/6/data_graph/"
    result_dir = "amazon_Calig/"
    name = "data.graph"
    
    # 文件路径
    input_path = dir_path + name
    output_path = result_dir + name
    
    # 创建输出目录（如果不存在）
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    # 从文件读取
    with open(input_path, "r") as f:
        query_data = f.read()
    
    # 执行转换
    converted_graph = convert_query_graph_format(query_data)
    
    # 保存结果
    with open(output_path, "w") as f:
        f.write(converted_graph)
    
    print(f"转换后的查询图已保存至 {output_path}")
    print("\n转换结果预览:")
    print(converted_graph[:200] + "..." if len(converted_graph) > 200 else converted_graph)

# 数据图处理
def process_data_graph():
    # data_dir =  "/home/cc/haibin2/amazon/6/data_graph/"  # 修改为你的数据图路径
    # data_result_dir = "amazon/"
    # data_name = "insertion.graph"  # 修改为你的数据图文件名
    
    dir_path = "/home/cc/haibin2/CSM-Benchmark/queryGraph/amazon/"
    result_dir = "amazon_Calig/"
    name = "insertion.graph"

    # 文件路径
    data_input_path = dir_path + name
    data_output_path = result_dir + name
    
    # 创建输出目录（如果不存在）
    os.makedirs(os.path.dirname(data_output_path), exist_ok=True)
    
    # 如果文件存在，转换它
    if os.path.exists(data_input_path):
        # 从文件读取
        with open(data_input_path, "r") as f:
            data_graph_content = f.read()
        
        # 执行转换
        converted_data_graph = convert_data_graph_format(data_graph_content)
        
        # 保存结果
        with open(data_output_path, "w") as f:
            f.write(converted_data_graph)
        
        print(f"转换后的数据图已保存至 {data_output_path}")
        print("\n转换结果预览:")
        print(converted_data_graph[:200] + "..." if len(converted_data_graph) > 200 else converted_data_graph)
    else:
        print(f"错误：找不到数据图文件 {data_input_path}")

if __name__ == "__main__":
    # 执行查询图转换
    # process_query_graph()
    
    # 执行数据图转换
    process_data_graph()

转换后的数据图已保存至 amazon_Calig/insertion.graph

转换结果预览:
e 257263 282429
e 33 328478
e 60460 267609
e 57526 123267
e 156368 156371
e 137624 259843
e 55302 168993
e 317898 329845
e 235430 270764
e 17316 172801
e 46931 46933
e 138288 176398
e 8132 30780
e 635...


## Transform a query graph


In [None]:
import os

def batch_process_query_graphs(input_dir, output_dir):
    """
    批量处理查询图文件，从输入目录读取文件，转换后保存到输出目录。
    
    Args:
        input_dir (str): 输入目录路径，包含sparse/dense/tree等子文件夹
        output_dir (str): 输出目录路径，将保持与输入相同的子文件夹结构
    """
    # 遍历输入目录下的所有文件和子文件夹
    for root, dirs, files in os.walk(input_dir):
        for name in files:
            # 构建输入和输出路径
            input_path = os.path.join(root, name)
            # 计算相对路径以保持相同的子文件夹结构
            rel_path = os.path.relpath(input_path, input_dir)
            output_path = os.path.join(output_dir, rel_path)
            
            # 创建输出目录（如果不存在）
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            
            try:
                # 从文件读取
                with open(input_path, "r") as f:
                    query_data = f.read()
                
                # 执行转换
                converted_graph = convert_query_graph_format(query_data)
                
                # 保存结果
                with open(output_path, "w") as f:
                    f.write(converted_graph)
                
                print(f"转换后的查询图已保存至 {output_path}")
                # print("\n转换结果预览:")
                # preview = converted_graph[:200] + "..." if len(converted_graph) > 200 else converted_graph
                print(preview)
                print("-" * 50)
                
            except Exception as e:
                print(f"处理文件 {input_path} 时出错: {str(e)}")
                continue

# 使用示例
if __name__ == "__main__":
    input_directory = "/home/cc/haibin2/CSM-Benchmark/queryGraph/amazon"  # 替换为你的输入目录
    output_directory = "/home/cc/haibin2/CSM-Benchmark/queryGraph/CaLiG_only"  # 替换为你的输出目录
    batch_process_query_graphs(input_directory, output_directory)

处理文件 /home/cc/haibin2/CSM-Benchmark/queryGraph/amazon/Readme.md 时出错: list index out of range
转换后的查询图已保存至 /home/cc/haibin2/CSM-Benchmark/queryGraph/CaLiG_only/6_self/tree/Q_21

转换结果预览:
v 0 4
v 1 1
v 2 4
v 3 0
v 4 4
v 5 0
e 0 3
e 0 5
e 1 3
e 1 4
e 2 4
--------------------------------------------------
转换后的查询图已保存至 /home/cc/haibin2/CSM-Benchmark/queryGraph/CaLiG_only/6_self/tree/Q_7

转换结果预览:
v 0 5
v 1 5
v 2 5
v 3 1
v 4 0
v 5 4
e 0 5
e 1 3
e 2 0
e 2 3
e 4 5
--------------------------------------------------
转换后的查询图已保存至 /home/cc/haibin2/CSM-Benchmark/queryGraph/CaLiG_only/6_self/tree/Q_9

转换结果预览:
v 0 1
v 1 5
v 2 1
v 3 5
v 4 3
v 5 0
e 0 1
e 0 5
e 1 3
e 2 4
e 2 5
--------------------------------------------------
转换后的查询图已保存至 /home/cc/haibin2/CSM-Benchmark/queryGraph/CaLiG_only/6_self/tree/Q_1

转换结果预览:
v 0 5
v 1 2
v 2 3
v 3 5
v 4 3
v 5 5
e 0 2
e 1 0
e 1 4
e 2 5
e 3 4
--------------------------------------------------
转换后的查询图已保存至 /home/cc/haibin2/CSM-Benchmark/queryGraph/CaLiG_only/6_self/tree/Q

## Transform delete graph stream

In [None]:
import os

def convert_query_graph_format(graph_data):
    """
    将原始查询图格式转换为新格式
    原格式：v <id> <label>, e <id1> <id2> <label>
    新格式：v <id> <label>, e <id1> <id2>
    """
    lines = graph_data.strip().split("\n")
    result = []
    
    for line in lines:
        parts = line.strip().split()
        if parts[0] == 'v':  # 顶点信息保持不变
            result.append(line)
        elif parts[0] == 'e':  # 边信息中移除标签
            new_edge = f"e {parts[1]} {parts[2]}"
            result.append(new_edge)
    
    return "\n".join(result)

def convert_data_graph_format(graph_data):
    """
    将原始数据图格式转换为新格式
    - 移除边的标签
    - 确保先输出所有顶点，再输出所有边
    原格式：v <id> <label>, e <id1> <id2> <label>（可能是乱序的）
    新格式：v <id> <label>（所有顶点）, e <id1> <id2>（所有边）
    """
    lines = graph_data.strip().split("\n")
    vertices = []
    edges = []
    
    # 分离顶点和边
    for line in lines:
        parts = line.strip().split()
        if not parts:  # 跳过空行
            continue
            
        if parts[0] == 'v':  # 顶点信息
            vertices.append(line)
        elif parts[0] == 'e':  # 边信息
            new_edge = f"e {parts[1]} {parts[2]}"  # 移除边的标签
            edges.append(new_edge)
    
    # 合并结果，先顶点后边
    result = vertices + edges
    return "\n".join(result)

def convert_deletion_format(deletion_data):
    """
    将原始删除操作格式转换为新格式
    原格式：-v <id> <label>, -e <id1> <id2> <label>
    新格式：e <-id1-1> <-id2-1>
    """
    lines = deletion_data.strip().split("\n")
    result = []
    
    for line in lines:
        parts = line.strip().split()
        if parts[0] == '-e':  # 转换删除的边
            # 取负值并减1（-id-1）
            id1 = -int(parts[1]) - 1
            id2 = -int(parts[2]) - 1
            new_edge = f"e {id1} {id2}"
            result.append(new_edge)
        elif parts[0] == 'e':  # 保留普通边
            new_edge = f"e {parts[1]} {parts[2]}"
            result.append(new_edge)
    
    return "\n".join(result)

def process_insertion_graph():
    """处理普通数据图，去除边的标签并确保顶点在前，边在后"""
    data_dir = "/home/cc/haibin2/amazon/6/data_graph/"
    data_result_dir = "amazon/"
    data_name = "data.graph"  # 修改为你的数据图文件名
    
    # 创建输出目录（如果不存在）
    os.makedirs(data_result_dir, exist_ok=True)
    
    # 文件路径
    data_input_path = os.path.join(data_dir, data_name)
    data_output_path = os.path.join(data_result_dir, data_name)
    
    # 如果文件存在，转换它
    if os.path.exists(data_input_path):
        # 从文件读取
        with open(data_input_path, "r") as f:
            data_graph_content = f.read()
        
        # 执行转换
        converted_data_graph = convert_data_graph_format(data_graph_content)
        
        # 保存结果
        with open(data_output_path, "w") as f:
            f.write(converted_data_graph)
        
        print(f"转换后的数据图已保存至 {data_output_path}")
        print("\n转换结果预览:")
        print(converted_data_graph[:200] + "..." if len(converted_data_graph) > 200 else converted_data_graph)
    else:
        print(f"错误：找不到数据图文件 {data_input_path}")

def process_deletion_graph():
    """处理删除操作图，转换为负索引格式"""
    data_dir = "/home/cc/haibin2/amazon/6/data_graph/"
    data_result_dir = "amazon/"
    data_name = "deletion.graph"  # 修改为你的删除操作文件名
    
    # 创建输出目录（如果不存在）
    os.makedirs(data_result_dir, exist_ok=True)
    
    # 文件路径
    data_input_path = os.path.join(data_dir, data_name)
    data_output_path = os.path.join(data_result_dir, data_name)
    
    # 如果文件存在，转换它
    if os.path.exists(data_input_path):
        # 从文件读取
        with open(data_input_path, "r") as f:
            deletion_content = f.read()
        
        # 执行转换
        converted_deletion = convert_deletion_format(deletion_content)
        
        # 保存结果
        with open(data_output_path, "w") as f:
            f.write(converted_deletion)
        
        print(f"转换后的删除操作已保存至 {data_output_path}")
        print("\n转换结果预览:")
        print(converted_deletion[:200] + "..." if len(converted_deletion) > 200 else converted_deletion)
    else:
        print(f"错误：找不到删除操作文件 {data_input_path}")

def process_query_graph():
    """处理查询图，去除边的标签"""
    query_dir = "/home/cc/haibin2/amazon/6/query_graph/sparse_6/"
    query_result_dir = "amazon/"
    query_name = "Q_1"  # 修改为你的查询图文件名
    
    # 创建输出目录（如果不存在）
    os.makedirs(query_result_dir, exist_ok=True)
    
    # 文件路径
    query_input_path = os.path.join(query_dir, query_name)
    query_output_path = os.path.join(query_result_dir, query_name)
    
    # 如果文件存在，转换它
    if os.path.exists(query_input_path):
        # 从文件读取
        with open(query_input_path, "r") as f:
            query_content = f.read()
        
        # 执行转换
        converted_query = convert_query_graph_format(query_content)
        
        # 保存结果
        with open(query_output_path, "w") as f:
            f.write(converted_query)
        
        print(f"转换后的查询图已保存至 {query_output_path}")
        print("\n转换结果预览:")
        print(converted_query[:200] + "..." if len(converted_query) > 200 else converted_query)
    else:
        print(f"错误：找不到查询图文件 {query_input_path}")

if __name__ == "__main__":
    print("开始处理图文件...")
    
    # 处理查询图
    process_query_graph()
    
    # 处理数据图
    process_insertion_graph()
    
    # 处理删除操作
    process_deletion_graph()
    
    print("所有转换完成！")