# 初始化

In [None]:
import os
import time
import threading
from http.server import HTTPServer, SimpleHTTPRequestHandler
from local_packages import *
from dotenv import load_dotenv
from queue import Queue
import concurrent.futures
import random
import json
from concurrent.futures import ThreadPoolExecutor, as_completed

# 加载环境变量
dotenv_path = os.path.join(os.getcwd(), '.env')
model_path = r'D:\Joining\Models\Text2Vec_base_zh'#填embedding模型的地址
# 设置项目根目录和图片目录
project_root = os.path.dirname(dotenv_path)

agentopener=AgentOpener(service_type='qwen')
service=agentopener.service
js=JSProcessor()
embedder=Embedder(model_path)

## 设置工作目录
期待输出的json能够在这个目录下遵循以下结构：
- /root_folder
    - raw_data.txt
    - raw_data.json
    - /step_1_process
        - /step_1_processed_entities
            - entity_para_index.json
            - entity_partitions.json
            - entity_recog_01.json
            - step_1_processed.json
        - /step_1_unprocessed

    - /step_2_process
        - /graph_structure
            - entities.json
            - relations.json
            - nodes.json
            - edges.json
        - /graph_picture
            - html
            - style.css

- model_path

In [None]:
import os

# 定义根文件夹和模型路径
root_folder = 'Task6_Text2KG_light'

# 设置各个路径
raw_data_txt_path = os.path.join(root_folder, "raw_data.txt")
raw_data_json_path = os.path.join(root_folder, "raw_data.json")

step_1_process_folder = os.path.join(root_folder, "step_1_process")
step_1_processed_entities_folder = os.path.join(step_1_process_folder, "step_1_processed_entities")
entity_para_index_path = os.path.join(step_1_processed_entities_folder, "entity_para_index.json")
entity_partitions_path = os.path.join(step_1_processed_entities_folder, "entity_partitions.json")
entity_recog_01_path = os.path.join(step_1_processed_entities_folder, "entity_recog_01.json")
step_1_processed_json_path = os.path.join(step_1_processed_entities_folder, "step_1_processed.json")
step_1_unprocessed_folder = os.path.join(step_1_process_folder, "step_1_unprocessed")

step_2_process_folder = os.path.join(root_folder, "step_2_process")
graph_structure_folder = os.path.join(step_2_process_folder, "graph_structure")
entities_json_path = os.path.join(graph_structure_folder, "entities.json")
relations_json_path = os.path.join(graph_structure_folder, "relations.json")
nodes_json_path = os.path.join(graph_structure_folder, "nodes.json")
edges_json_path = os.path.join(graph_structure_folder, "edges.json")

graph_picture_folder = os.path.join(step_2_process_folder, "graph_picture")

html_path = os.path.join(graph_picture_folder, "nodes_and_edges.html")

# 创建所有需要的目录
directories = [
    root_folder,
    step_1_process_folder,
    step_1_processed_entities_folder,
    step_1_unprocessed_folder,
    step_2_process_folder,
    graph_structure_folder,
    graph_picture_folder
]

for directory in directories:
    os.makedirs(directory, exist_ok=True)

# 创建所有需要的文件
files = [
    raw_data_txt_path,
    raw_data_json_path,
    entity_para_index_path,
    entity_partitions_path,
    entity_recog_01_path,
    step_1_processed_json_path,
    entities_json_path,
    relations_json_path,
    nodes_json_path,
    edges_json_path,
    html_path
]

for file in files:
    if not os.path.exists(file):
        with open(file, 'w', encoding='utf-8') as f:
            if file.endswith('.json'):
                f.write('{}')  # 写入空的JSON对象
            elif file.endswith('.txt'):
                f.write('')  # 创建空的txt文件
            elif file.endswith('.html'):
                f.write('<html></html>')  # 创建简单的HTML文件

print("所有目录和文件已成功创建。")


# 主函数

In [None]:

import json
import os
import time
import traceback
import queue
from queue import Queue, Empty
from threading import Thread, Lock
from concurrent.futures import ThreadPoolExecutor, as_completed
from transformers import AutoModel, AutoTokenizer
import torch
import numpy as np

class ParseError(Exception):
    def __init__(self, code, message):
        self.code = code
        self.message = message
        super().__init__(self.message)

def step0_str_to_json(input_str):
    # 将字符串按双换行符分割成大段
    paragraphs = input_str.strip().split('\n\n')
    
    # 创建一个列表来存储合并后的段落
    merged_paragraphs = []
    temp_paragraph = ""
    
    for paragraph in paragraphs:
        sub_paragraphs = paragraph.split('\n')
        for sub_paragraph in sub_paragraphs:
            if len(temp_paragraph) == 0:
                temp_paragraph = sub_paragraph.strip()
            else:
                if len(temp_paragraph) + len(sub_paragraph.strip()) < 200:
                    temp_paragraph += " " + sub_paragraph.strip()
                else:
                    merged_paragraphs.append(temp_paragraph)
                    temp_paragraph = sub_paragraph.strip()

        # 检查temp_paragraph的长度，如果大于200字则进行分割
        while len(temp_paragraph) > 400:
            split_index = temp_paragraph.rfind(' ', 0, 400)
            if split_index == -1:
                split_index = 400
            merged_paragraphs.append(temp_paragraph[:split_index])
            temp_paragraph = temp_paragraph[split_index:].strip()

    # 将最后一个段落加入列表
    if temp_paragraph:
        merged_paragraphs.append(temp_paragraph)
    
    # 创建一个字典，将每个段落存储在字典中
    result = {}
    for i, paragraph in enumerate(merged_paragraphs, 1):
        result[f'paragraph_{i}'] = {'text': paragraph}
    
    return result

def step0_read_txt_to_str(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

def step1_parse_single_file(original_single_dict: dict, target_keys: list) -> dict:
    result = {}
    text = str(original_single_dict.get('text', '未提供'))
    key = target_keys[0]
    sys_prompt = f'''
    你是一个文本分析师，对于用户输入的文本，我要求你输出一个python列表，其中的值都是知识点、实体或概念，以如下结构的一个python列表返回，由中括号所表示：
    ['entity1','entity2','entity3',...]
    注意：务必精炼，省略一切无关内容
    '''
    user_prompt = f'''
    用户输入的文本为{text}
    '''
    try:
        msg = service.ask_once(sys_prompt,user_prompt)
        parse_success = js.parse_list(msg)
        if parse_success:
            result[key] = parse_success
            print('1 success')
        else:
            raise ParseError(1001, f"解析失败：书名 {key}")
    except Exception as e:
        error_message = str(e)
        if 'Error code: 400' in error_message:
            raise ParseError(400, "发生400错误，跳过当前处理")
        elif 'Error code: 429' in error_message:
            raise ParseError(429, "发生429错误，等待30秒后继续执行")
        else:
            raise ParseError(1000, f"发生未知错误{e}")
    return result

def step1_worker(task_key, original_single_dict, lock, result_dict):
    target_keys = [task_key]
    retry_count = 0
    while retry_count <= 2:
        try:
            parsed_result = step1_parse_single_file(original_single_dict, target_keys)
            with lock:
                result_dict[task_key] = parsed_result
            break
        except ParseError as e:
            if e.code == 1001:
                retry_count += 1
            elif e.code == 429 or 'Throttling.RateQuota' in str(e):
                print(f"429 或 Throttling.RateQuota 错误: {task_key}，将在20秒后重试... (尝试 {retry_count + 1})")
                time.sleep(20)
                retry_count += 1
            else:
                break
# 抽取原始概念的多线程
def step1_multi_thread_parse(original_dict: dict, thread_number: int) -> dict:
    result_dict = {}
    lock = threading.Lock()
    tasks = [(task_key, value_dict) for task_key, value_dict in original_dict.items()]

    with ThreadPoolExecutor(max_workers=thread_number) as executor:
        future_to_task = {executor.submit(step1_worker, task_key, value_dict, lock, result_dict): (task_key, value_dict) for task_key, value_dict in tasks}
        for future in as_completed(future_to_task):
            task_key, _ = future_to_task[future]
            try:
                future.result(timeout=30)
            except Exception as e:
                print(f"任务 {task_key} 发生错误: {traceback.format_exc()}")

    # 线程安全的写入JSON文件
    with lock:
        with open(step_1_processed_json_path, 'w', encoding='utf-8') as f:
            json.dump(result_dict, f, ensure_ascii=False, indent=4)

    return result_dict
# 函数：按段落编号排序字典
def sort_by_para_index(input_dict):
    # 提取键值对并按段落编号排序
    sorted_items = sorted(input_dict.items(), key=lambda x: int(x[0].split('_')[-1]))
    # 将排序后的键值对转换回字典
    sorted_dict = {item[0]: item[1] for item in sorted_items}
    return sorted_dict
# 函数：合并所有sub_dict中的元素到一个列表
def merge_elements_to_single_list(input_dict):
    # 创建一个空列表用于存放所有元素
    final_list = []
    # 遍历原始字典中的每个sub_dict
    for sub_dict in input_dict.values():
        # 假设每个sub_dict的值是一个列表
        elements = sub_dict.values()
        # 遍历每个sub_dict中的所有列表，并将元素添加到最终列表中
        for element_list in elements:
            # 将当前列表的元素添加到最终列表中
            final_list.extend(element_list)
    # 返回包含所有元素的最终列表
    return final_list
# 遍历similar_keys_dict的每个key，查找merged_dict中的相关段落
def find_related_paragraphs(merged_dict, similar_keys_dict):
    result_dict = {}
    
    for main_key, similar_keys_info in similar_keys_dict.items():
        related_paragraphs = {}
        
        for para_key, para_info in merged_dict.items():
            para_entities = para_info["entities"]
            para_text = para_info["text"]
            
            if any(key in para_entities for key in [main_key] + similar_keys_info["Similar_keys"]):
                related_paragraphs[para_key] = para_text
        
        result_dict[main_key] = {
            "related_paragraphs": related_paragraphs
        }
    
    return result_dict
# 生成实体解释的多线程
def process_related_paragraphs(num_threads, related_paragraphs_dict):
    # 创建一个线程安全的队列
    task_queue = queue.Queue()

    # 创建一个字典，用于存储结果
    result_dict = {}
    result_dict_lock = threading.Lock()  # 锁用于确保线程安全

    def parse_dict_with_timeout(k, v, timeout=20, max_retries=3):
        related_content = ''.join(v['related_paragraphs'].values())
        sys_prompt = f'''
        你是一个基于原文的概念分析师，不会私自解读，你总是只返回提供给你的那一个概念的定义
        '''
        user_prompt = f'''
        请为我输出：{k}这个概念的定义，基于{related_content}忠实地返回,不许私自解读,请返回一个json字典，以:
        {{'概念':'...','定义':'...'}}的格式返回
        '''
        for attempt in range(max_retries):
            try:
                return_dict = service.ask_once(sys_prompt,user_prompt)
                if 'Throttling.RateQuota' in return_dict:
                    raise Exception('Throttling.RateQuota encountered')
                parse_success = js.parse_dict(return_dict)
                if parse_success:
                    with result_dict_lock:
                        result_dict[k] = parse_success
                        print('success')
                    return True
            except Exception as e:
                if "429" in str(e) or 'Throttling.RateQuota' in str(e):
                    print(f"429 或 Throttling.RateQuota 错误: {k}，将在{40*(attempt+1)}秒后重试... (尝试 {attempt + 1})")
                    time.sleep(40*(attempt+1))
                else:
                    print(f"处理 {k} 时出错: {e}, {return_dict}, 尝试 {attempt + 1}")
        return False

    def worker():
        while True:
            item = task_queue.get()
            if item is None:
                break
            k, v = item
            try:
                with ThreadPoolExecutor(max_workers=1) as executor:
                    future = executor.submit(parse_dict_with_timeout, k, v)
                    success = future.result(timeout=30)
                    if not success:
                        print(f"Failed to process {k} after some attempts")
            except Exception as e:
                print(f"Timeout or error processing {k}: {e}")
            finally:
                task_queue.task_done()

    threads = []
    for i in range(num_threads):
        t = threading.Thread(target=worker)
        t.start()
        threads.append(t)

    for k, v in related_paragraphs_dict.items():
        task_queue.put((k, v))

    # 阻塞直到所有任务完成
    task_queue.join()

    # 停止所有工作线程
    for i in range(num_threads):
        task_queue.put(None)
    for t in threads:
        t.join()

    # 将 result_dict 格式化为 JSON 并存入文件
    entities_json_path = os.path.join(graph_structure_folder, "entities.json")
    print(result_dict)
    js.write_json(result_dict, entities_json_path)

    print("所有任务已完成，结果已保存。")
# 生成json的格式化
def process_entities(entities_dict, index_dict):
    result_dict = {}
    
    # 遍历 entities_dict
    for main_entity, data in entities_dict.items():
        related_paragraphs = data.get("related_paragraphs", {})
        
        # 遍历相关段落
        for para_key, para_text in related_paragraphs.items():
            if para_key in index_dict:
                entities = index_dict[para_key]["entities"]
                
                # 遍历段落中的实体
                for entity in entities:
                    if entity != main_entity:
                        entity_pair = frozenset([main_entity, entity])
                        
                        # 将实体对加入结果字典
                        if entity_pair not in result_dict:
                            result_dict[entity_pair] = {
                                "paragraphs": set()
                            }
                        result_dict[entity_pair]["paragraphs"].add((
                            para_key,
                            index_dict[para_key]["text"]
                        ))
    
    # 将 frozenset 转换成列表，并移除重复内容
    final_result_dict = {}
    for entity_pair, details in result_dict.items():
        entity_list = list(entity_pair)
        paragraphs = [
            {"paragraph_key": para_key, "text": para_text}
            for para_key, para_text in details["paragraphs"]
        ]
        final_result_dict[tuple(entity_list)] = {
            "paragraphs": paragraphs
        }
    for entity_pair, details in final_result_dict.items():
        print(f"Entity Pair: {entity_pair}")
        for paragraph in details["paragraphs"]:
            print(f"  Paragraph Key: {paragraph['paragraph_key']}")
            print(f"  Text: {paragraph['text']}")
        print()
    return final_result_dict
# 生成关系的多线程(这里被沉默了)
def process_relations_multithreaded(related_paragraphs_dict, num_threads=15):
    task_queue = queue.Queue()
    result_dict_lock = threading.Lock()
    success_count = 0
    failure_count = 0
    final_result_dict_2 = {}

    def parse_relation(k, v, attempt=1, timeout=30):
        nonlocal success_count
        entity_1, entity_2 = k
        related_context = ''.join(para_dict['text'] for para_dict in v['paragraphs'])
        sys_prompt = f'''
        你是一名忠实的基于原文的概念分析师，对于不是强相关的概念你会坚决地指出他们无关。你将获得一个概念对 (A, B)、他们的原文，以及一组关系类别。请为每对概念确定关系类型，并表示为有向边 (to/from)、无向边 (and) 或无边 (no)。关系类别及其边表示如下，请你从下面选择一个最恰当的：
        "A 依赖于 B"
        "A 控制 B"
        "A 影响 B"
        "A 约束 B"
        "A 支持 B"
        "A 授权 B"
        "A 将信息或资源传递给 B"
        "A 引导 B"
        "A 转换为 B"
        "A 触发 B"
        "A 继承 B"
        "A 是 B 的流程步骤"
        "A 对 B 提供反馈"
        "A from B",
        "A 被 B 依赖",
        "A 被 B 控制",
        "A 被 B 影响",
        "A 被 B 约束",
        "A 被 B 支持",
        "A 被 B 授权",
        "A 被 B 传递信息或资源",
        "A 被 B 引导",
        "A 被 B 转换",
        "A 被 B 触发",
        "A 被 B 继承",
        "A 是 B 的流程步骤",
        "A 被 B 提供反馈"
        
        "A 与 B 是对比关系"
        "A 与 B 是并列关系"
        "A 与 B 是相似关系"
        "A 与 B 是互补关系"
        "A 与 B 是对称关系"
        "A 与 B 相关"

        无边 (no)：
        "独立的概念"
        "A 与 B 是独立的分类"
        "A 与 B 是独立的类型"
        不相关关系
        "A 与 B 是不相关的"
        
        以{{'关系类型':'A ... B','关系名称':'...','说明':'...'}}的形式返回，若无关则关系类型写'无关'
        '''
        user_prompt = f'''
        请你从上述关系中选择一个指出A:{entity_1}与B:{entity_2}之间的关系，根据相关内容：{related_context}，以：{{'关系类型':'A ... B','关系名称':'...','说明':'...'}}的字典格式返回；注意，关系类型务必写 A xx B, 若无关则关系类型写'无关'
        '''
        try:
            parse_success=True
            if parse_success:
                with result_dict_lock:
                    final_result_dict_2[tuple(k)] = {
                        'entity_pair': (entity_1, entity_2),
                        'relation_type': '',
                        'relation_name': '',
                        'relation_explaination': ''
                    }
                with result_dict_lock:
                    success_count += 1
                print(f"Success: {entity_1} 和 {entity_2} 的关系已成功解析")
                return True
        except ParseError as e:
            if "Error code: 429" in str(e) and attempt <= 3:  # 最多重试一次
                print(f"429 错误，等待40秒后重试 (第 {attempt} 次)")
                time.sleep(40)
                return parse_relation(k, v, attempt + 1, timeout)
            else:
                print(f"Error processing {k}: {e}, attempt {attempt}")
        except Exception as e:
            if ("Error code: 429" in str(e) or 'Throttling.RateQuota' in str(e)) and attempt <= 2:  # 最多重试一次
                print(f"429 或 Throttling.RateQuota 错误，等待{40*(attempt+1)}秒后重试 (第 {attempt} 次)")
                time.sleep(40*(attempt+1))
                return parse_relation(k, v, attempt + 1, timeout)
            else:            
                print(f"Error processing {k}: {e}, attempt {attempt}")
        return False
        
    def worker():
        nonlocal success_count, failure_count
        while True:
            item = task_queue.get()
            if item is None:
                break
            k, v = item
            try:
                with ThreadPoolExecutor(max_workers=1) as executor:
                    future = executor.submit(parse_relation, k, v)
                    success = future.result(timeout=30)
                    if not success:
                        with result_dict_lock:
                            failure_count += 1
                        print(f"Failed to process {k} after 2 attempts")
            except Exception as e:
                with result_dict_lock:
                    failure_count += 1
                print(f"Timeout or error processing {k}: {e}")
            finally:
                task_queue.task_done()

    def main():
        nonlocal success_count, failure_count
        threads = []
        for i in range(num_threads):
            t = threading.Thread(target=worker)
            t.start()
            threads.append(t)

        # 统计待处理任务数量
        total_tasks = len(related_paragraphs_dict)
        print(f"待处理任务总数: {total_tasks}")

        for k, v in related_paragraphs_dict.items():
            task_queue.put((k, v))

        # 阻塞直到所有任务完成
        task_queue.join()

        # 停止所有工作线程
        for i in range(num_threads):
            task_queue.put(None)
        for t in threads:
            t.join()

        # 打印任务完成情况
        print(f"总成功任务数: {success_count}")
        print(f"总失败任务数: {failure_count}")

        # 将结果写入JSON文件
        graph_dict = {}
        for k, v in final_result_dict_2.items():
            graph_dict[str(k)] = v

        relations_json_path = os.path.join(graph_structure_folder, "relations.json")
        js.write_json(graph_dict, relations_json_path)

        print(f"结果已保存到: {relations_json_path}")

    # 调用主函数
    main()
# 主函数
def txt_to_kg(file_path,all_relations, thread_number_1=180,thread_number_2=180,thread_number_3=180,threshold = 0.8 ):

    file_content = step0_read_txt_to_str(file_path)
#    print(file_content)
    json_output = step0_str_to_json(file_content)

    js.write_json(json_output, raw_data_json_path)
#    print(f"JSON 数据已写入 {raw_data_json_path}")

    step1_multi_thread_parse(json_output,thread_number=thread_number_1)
    
    original_dict = js.read_json(step_1_processed_json_path)
    sorted_dict = sort_by_para_index(original_dict)
    js.write_json(sorted_dict,step_1_processed_json_path)
    merged_list = merge_elements_to_single_list(original_dict)
        
    # 简化字典结构
    simplified_dict = {k: v[k] for k, v in sorted_dict.items()}

    entities_dict=simplified_dict
    text_dict=js.read_json(raw_data_json_path)

    # 合并字典
    merged_dict = {}
    for key in text_dict:
        merged_dict[key] = {
            "entities": entities_dict.get(key, []),
            "text": text_dict[key]["text"]
        }

    # 打印合并后的字典
    print(merged_dict)
    js.write_json(merged_dict,entity_recog_01_path)
    given_list=merged_list

    json_file_path=os.path.join(step_1_process_folder,'mapping_embedding_dict.json')
    embedded_dict=embedder.embed_list(given_list)
    print(embedded_dict)
    # 进行分区
    partitions = embedder.partition_by_similarity(embedded_dict, threshold)

    print(partitions)

    js.write_json(partitions,entity_partitions_path)

    partition_dict=js.read_json(entity_partitions_path)

    index_dict=js.read_json(entity_recog_01_path)

    # 生成相关段落的字典
    related_paragraphs_dict = find_related_paragraphs(index_dict, partition_dict)
    js.write_json(related_paragraphs_dict,entity_para_index_path)
    process_related_paragraphs(num_threads=thread_number_2,related_paragraphs_dict=related_paragraphs_dict)
    entities_dict=js.read_json(entity_para_index_path)
    index_dict=js.read_json(entity_recog_01_path) 
    final_result_dict=process_entities(entities_dict, index_dict)
    process_relations_multithreaded(final_result_dict, num_threads=thread_number_3)

    # 读取 JSON 文件
    dict1 = js.read_json(entities_json_path)
    dict2 = js.read_json(relations_json_path)

    # 将 dict1 转换为新的格式
    new_dict1 = {}
    for key, value in dict1.items():
        try:
            # 解析字符串为字典
            value_dict = value
            # 提取概念和定义
            node_name = value_dict['概念']
            node_def = value_dict['定义']
            new_dict1[str(node_name)] = {'name': node_name, 'node_content': node_def}
        except json.JSONDecodeError as e:
            print(f"JSONDecodeError for key {key}, value: {value}: {e}")
        except KeyError as e:
            print(f"KeyError for key {key}, value: {value}: {e}")

    new_dict2 = {}
    relation_id = 1
    for key, value in dict2.items():
        node1 = value["entity_pair"][0]
        node2 = value["entity_pair"][1]
        if node1 in new_dict1 and node2 in new_dict1:
            relation_name = value['relation_name']
            relation_type = ''
            for r_type, r_list in all_relations.items():
                if relation_name in r_list:
                    relation_type = r_type
                    break
            
            new_dict2[relation_id] = {
                'begin_node': new_dict1[node1]['name'],
                'end_node': new_dict1[node2]['name'],
                'relation_name': relation_name,
                'relation_explaination': value['relation_explaination'],
                'relation_type': relation_type
            }
            relation_id += 1

    # 写入新的 JSON 文件
    js.write_json(new_dict1, nodes_json_path)
    js.write_json(new_dict2, edges_json_path)

    import networkx as nx

    html_template_path = os.path.join(graph_picture_folder, "template.html")
    output_html_path = os.path.join(graph_picture_folder, "nodes_and_edges.html")

    # 读取JSON数据
    nodes=js.read_json(nodes_json_path)
    edges=js.read_json(edges_json_path)

    # 创建一个无向图
    G = nx.Graph()

    # 添加节点
    for node_id in nodes.keys():
        G.add_node(node_id)

    # 添加边
    for edge_id, edge_info in edges.items():
        G.add_edge(edge_info['begin_node'], edge_info['end_node'], relation_name=edge_info['relation_name'], relation_explaination=edge_info['relation_explaination'])

    # 使用spring布局算法来计算节点位置
    pos = nx.spring_layout(G)

    # 将位置转换为字典
    positions = {node: (float(x), float(y)) for node, (x, y) in pos.items()}

    # 将节点和边的数据转换为JSON字符串
    nodes_data = json.dumps([{'id': node_id, 'name': node_info['name'], 'content': node_info['node_content']} for node_id, node_info in nodes.items()])
    links_data = json.dumps([{'source': edge_info['begin_node'], 'target': edge_info['end_node'], 'name': edge_info['relation_name'], 'explain': edge_info['relation_explaination']} for edge_id, edge_info in edges.items()])

    # 读取HTML模板
    with open(html_template_path, 'r', encoding='utf-8') as f:
        html_template = f.read()

    # 替换模板中的占位符
    html_content = html_template.replace('{{ nodes }}', nodes_data).replace('{{ links }}', links_data)

    # 将生成的HTML内容保存到文件中
    with open(output_html_path, 'w', encoding='utf-8') as f:
        f.write(html_content)

    print(f"HTML文件已生成：{output_html_path}")
    return


In [None]:
relations = {
    "A to B": [
        "A to B",
        "A 依赖于 B",
        "A 控制 B",
        "A 影响 B",
        "A 约束 B",
        "A 支持 B",
        "A 授权 B",
        "A 将信息或资源传递给 B",
        "A 引导 B",
        "A 转换为 B",
        "A 触发 B",
        "A 继承 B",
        "A 是 B 的流程步骤",
        "A 对 B 提供反馈"
    ],
    "A from B": [
        "A 被 B 依赖",
        "A 被 B 控制",
        "A 被 B 影响",
        "A 被 B 约束",
        "A 被 B 支持",
        "A 被 B 授权",
        "A 被 B 传递信息或资源",
        "A 被 B 引导",
        "A 被 B 转换",
        "A 被 B 触发",
        "A 被 B 继承",
        "A 是 B 的流程步骤",
        "A 被 B 提供反馈"
    ],
    "A and B": [
        "A and B",
        "A 与 B 是对比关系",
        "A 与 B 是并列关系",
        "A 与 B 是相似关系",
        "A 与 B 是互补关系",
        "A 与 B 是对称关系",
        "A 与 B 相关"
    ],
    "A no B": [
        "无关",
        "不相关",
        "A no B",
        "独立的概念",
        "A 与 B 是独立的分类",
        "A 与 B 是独立的类型",
        "不相关关系",
        "A 与 B 是不相关的"
    ]
}

txt_to_kg(raw_data_txt_path,relations, thread_number_1=10, thread_number_2=10, thread_number_3=5, threshold = 0.8 )