<a href="https://colab.research.google.com/github/IKKEM-Lin/colab/blob/main/gen_turtle_v3_20231123.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Reaction 数据转化为三元组（turtle）



In [None]:
# install dependencies
! pip install pubchempy
! pip install rdflib
! pip install requests
! pip install loguru
! pip install networkx

import pubchempy as pcp
from rdflib import Namespace, Literal, URIRef, Graph as RDFGraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
import networkx as nx
from networkx import Graph as NXGraph
import requests
from loguru import logger

import copy
import json
import uuid
import collections
import hashlib
import os
import re


url = 'https://raw.githubusercontent.com/IKKEM-Lin/colab/main/substance_map_20231103.json'
response = requests.get(url)
substance_map = response.json()

url2 = 'https://raw.githubusercontent.com/IKKEM-Lin/colab/main/all_article_20231123.json'
all_articles = requests.get(url2).json()

def get_article_detail(article_id):
  article = list(filter(lambda x: x["id"] == article_id, all_articles))
  return article and article[0] or {}

### 1. 公共函数，用于统一化合物的名称，https://pubchem.deno.dev 会缓存所请求的数据



In [26]:
# common function

def get_IUPAC_name_final(name, mapping_dict):
    # try:
    #     r = requests.get(f"https://pubchem.deno.dev/iupac?name={name}")
    #     result = json.loads(r.text)
    #     if result.get("data"):
    #         return result.get("data") or name
    # except:
    #     return name
    temp = substance_map.get(name)
    if temp:
      return temp["cid"]
    else:
      return name

def get_spieces_class_operations(key, mapping_dict, IUPAC_name = ""):
    def get_md5(name):
      return hashlib.md5(name.encode("UTF-8")).hexdigest()

    # def get_CHEBI_ID(name):
    #     try:
    #         r = requests.get(f"https://pubchem.deno.dev/chebi?name={name}")
    #         result = json.loads(r.text)
    #         if result.get("data"):
    #             return result.get("data") or name
    #     except:
    #         return name

    # if not IUPAC_name:
    #     IUPAC_name = get_IUPAC_name_final(key, mapping_dict)
    # name_list = IUPAC_name.split(";")
    # name = name_list[0] if name_list else ""
    # tag = get_CHEBI_ID(name)
    # if "CHEBI" in tag:
    #     re_tag = ''.join(re.findall("(CHEBI_\d+)", tag))
    #     return re_tag, URIRef("obo:" + re_tag)
    # else:
    #     id_str = get_md5(IUPAC_name)
    #     return id_str, URIRef("spi:"+"{}".format(id_str))
    if re.match(r"\d+$", IUPAC_name):
      return IUPAC_name, URIRef("obo:" + IUPAC_name)
    else:
      id_str = get_md5(IUPAC_name)
      return id_str, URIRef("spi:"+"{}".format(id_str))



### 2. 数据模型，定义了所有三元组及谓词

In [27]:
# data model

class GraphModel:
    def __init__(self):
        self.properties = dict()
        self.__define_properties()
        # 反应详情
        self.reaction_detail = """
            <react:{reaction_id}>  <{doi}> "{doi_value}" ; \n
                                    <{title}> "{title_value}" ; \n
                                    <{journal}> "{journal_value}" ; \n
                                    <{pubdate}> "{pubdate_value}" ; \n
                                    <{author}> "{author_value}" ; \n
                                    <{rid}> "{rid_value}" ; \n
                                    <{reaction_type}> "{reaction_type_value}" ; \n
                                    <{pressure}> "{pressure_value}" ; \n
                                    <{temperature}> "{temperature_value}" ; \n
                                    <{thermal_effect}> "{thermal_effect_value}" ; \n
                                    <{standard_enthalpy}> "{standard_enthalpy_value}" ; \n
                                    <{time}> "{time_value}" . \n
        """

        self.reaction_cata = """
            <react:{reaction_id}> <{has_catalyst}> <cata:{cid}> . \n
        """

        self.cata_detail = """
                <cata:{id}> <{cid}> "{cid_value}" ; \n
                            <{is_catalyst_of}> <react:{reaction_id}> ;
                            <{name}> "{name_value}" ; \n
                            <{ratio}> "{ratio_value}" ; \n
                            <{iupac_name}> "{iupac_name_value}" ; \n
                            <{ssa}> "{ssa_value}" ; \n
                            <{tof}> "{tof_value}" ; \n
                            <{shape}> "{shape_value}" ; \n
                            <{stability}> "{stability_value}" ; \n
                            <{size}> "{size_value}" ; \n
                            <{BET}> "{BET_value}" ; \n
                            <{support}> "{support_value}" ; \n
                            <{synthetic_method}> "{synthetic_method_value}" ; \n
                            <{complex_method}> "{complex_method_value}" ; \n
                            <{pore_size}> "{pore_size_value}" ; \n
                            <{pore_volume}> "{pore_volume_value}" ; \n
                            <{precursor}> "{precursor_value}" ; \n
                            <{loading}> "{loading_value}" .
        """

        self.reaction_reactant = """\n
            <react:{reaction_id}> <{has_reactant}> <{reactant_value}> . \n
            <{reactant_value}> <{is_reactant_of}> <react:{reaction_id}> . \n
        """

        self.reactant_detail = """\n
            <state:{reaction_id}_reactant_{reactant_id}> a rdf:Statement ; \n
                    rdf:object <react:{reaction_id}> ; \n
                    rdf:predicate <{is_reactant_of}> ; \n
                    rdf:subject <{reactant_value}> ; \n
                    <{amount}> "{amount_value}" ; \n
                    <{ratio}> "{ratio_value}" ; \n
                    <{gas_ratio}> "{gas_ratio_value}" ; \n
                    <{p_initial}> "{p_initial_value}" ; \n
                    <{p_final}> "{p_final_value}" ; \n
                    <{conversion}> "{conversion_value}" . \n
        """

        self.reaction_product = """ \n
            <react:{reaction_id}> <{has_product}> <{product_class}> .  \n
            <{product_class}> a <{product_class}> ;  \n
            <{is_product_of}> <react:{reaction_id}> .
        """

        self.product_detail = """ \n
            <{state}> a rdf:Statement ; \n
            rdf:object <{product}> ; \n
            rdf:predicate <{has_product}> ; \n
            rdf:subject <react:{reaction_id}> ; \n
            <{selectivity}> "{selectivity_value}" ; \n
            <{selectivity_other}> "{selectivity_other_value}" ; \n
            <{yield_}> "{yield_value}" ; \n
            <{yield_other}> "{yield_other_value}" . \n
        """

        self.reaction_solvent = """ \n
            <react:{reaction_id}> <{has_solvent}> <{solvent_class}> .  \n
            <{solvent_class}> a <{solvent_class}> ;  \n
            <{is_solvent_of}> <react:{reaction_id}> .
        """

        self.solvent_detail = """ \n
            <{state}> a rdf:Statement ; \n
            rdf:object <{solvent}> ; \n
            rdf:predicate <{has_solvent}> ; \n
            rdf:subject <react:{reaction_id}> ; \n
            <{concentration}> "{concentration_value}" . \n
        """

    def add_spi_propertity(self, name, IUPAC_name, class_URI):
        formula_insert = ""
        if any([_.isupper() for _ in name]):
            formula_insert = """  \n
                <{class_URI}> <{has_formula}> "{name}" ;  \n
                <{has_IUPAC_name}> "{IUPAC_name}" .  \n
            """
        else:
            formula_insert = """ \n
                <{class_URI}> <{has_name}> "{name}" ;  \n
                <{has_IUPAC_name}> "{IUPAC_name}" .  \n
            """
        return formula_insert.format(
            has_formula=self.properties["has_formula"],
            has_name=self.properties["has_name"],
            has_IUPAC_name=self.properties["has_IUPAC_name"],

            class_URI=str(class_URI),
            name=name.replace('"', r'\"'),
            IUPAC_name=IUPAC_name.replace('"', r'\"')
        )

    def __define_properties(self):
        # 当前反应的基本信息
        # self.properties["_id"] = "react:" + "_id"  ##
        self.properties["doi"] = "react:" + "doi"
        self.properties["title"] = "react:" + "title"
        self.properties["journal"] = "react:" + "journal"
        self.properties["pubdate"] = "react:" + "pubdate"
        self.properties["author"] = "react:" + "author"
        self.properties["reaction_type"] = "react:" + "type"
        # 反应属性
        self.properties["rid"] = "react:" + "id"  # rid
        self.properties["pressure"] = "react:" + "pressure"
        self.properties["temperature"] = "react:" + "temperature"
        self.properties["time"] = "react:" + "time"
        self.properties["thermal_effect"] = "react:" + "thermal_effect"
        self.properties["standard_enthalpy"] = "react:" + "standard_enthalpy"

        # 反应相关实体信息
        self.properties["is_reactant_of"] = "spi:" + "is_reactant_of"
        self.properties["has_reactant"] = "react:" + "has_reactant"
        self.properties["reactant_amount"] = "react:" + "amount"
        self.properties["reactant_ratio"] = "react:" + "ratio"
        self.properties["gas_ratio"] = "react:" + "gas_ratio"
        self.properties["p_initial"] = "react:" + "p_initial"
        self.properties["p_final"] = "react:" + "p_final"
        self.properties["conversion"] = "react:" + "conversion"

        self.properties["is_product_of"] = "spi:" + "is_product_of"
        self.properties["has_product"] = "react:" + "has_product"
        self.properties["selectivity"] = "react:" + "selectivity"
        self.properties["selectivity_other"] = "react:" + "selectivity_other"
        self.properties["yield"] = "react:" + "yield"
        self.properties["yield_other"] = "react:" + "yield_other"

        self.properties["is_catalyst_of"] = "cata:" + "is_catalyst_of"
        self.properties["has_catalyst"] = "react:" + "has_catalyst"

        self.properties["is_intermediate_of"] = "spi:" + "is_intermediate_of"
        self.properties["has_intermediate"] = "react:" + "has_intermediate"

        self.properties["is_solvent_of"] = "sol:" + "is_solvent_of"
        self.properties["has_solvent"] = "react:" + "has_solvent"
        self.properties["concentration"] = "sol:" + "concentration"

        self.properties["has_name"] = "spi:" + "has_name"
        self.properties["has_formula"] = "spi:" + "has_formula"
        self.properties["has_IUPAC_name"] = "spi:" + "has_IUPAC_name"

        # 催化剂属性
        self.properties["cid"] = "cata:" + "id"
        self.properties["name"] = "cata:" + "name"
        self.properties["ratio"] = "cata:" + "ratio"
        self.properties["iupac_name"] = "cata:" + "iupac_name"
        self.properties["ssa"] = "cata:" + "ssa"
        self.properties["tof"] = "cata:" + "tof"
        self.properties["shape"] = "cata:" + "shape"
        self.properties["stability"] = "cata:" + "stability"
        self.properties["size"] = "cata:" + "size"
        self.properties["BET"] = "cata:" + "BET"
        self.properties["support"] = "cata:" + "support"
        self.properties["synthetic_method"] = "cata:" + "synthetic_method"
        self.properties["complex_method"] = "cata:" + "complex_method"
        self.properties["pore_size"] = "cata:" + "pore_size"
        self.properties["pore_volume"] = "cata:" + "pore_volume"
        self.properties["precursor"] = "cata:" + "precursor"
        self.properties["loading"] = "cata:" + "loading"

### 3. 导入reaction json数据并转化为三元组（中间体尚未处理！）

In [28]:
class GraphOperations:
    def __init__(self):
        self.graph_dict = GraphModel()
        self.no_class_list = []
        self.mapping_dict = {}
        self.update_query = "@prefix cata: <http://www.semanticweb.org/kg/relay-cataysis/catalyst/>. \n" \
                   "@prefix obo: <http://purl.obolibrary.org/obo/>. \n" \
                   "@prefix owl: <http://www.w3.org/2002/07/owl#>. \n" \
                   "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>. \n" \
                   "@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>. \n" \
                   "@prefix chebi: <http://purl.obolibrary.org/obo/chebi/>. \n" \
                   "@prefix inter: <http://www.semanticweb.org/kg/relay-cataysis/intermediate/>. \n" \
                   "@prefix react: <http://www.semanticweb.org/kg/relay-cataysis/reaction/>. \n" \
                   "@prefix state: <http://www.semanticweb.org/kg/relay-cataysis/statement/>. \n" \
                   "@prefix xml: <http://www.w3.org/XML/1998/namespace>. \n" \
                   "@prefix xsd: <http://www.w3.org/2001/XMLSchema#>. \n" \
                   "@prefix spi: <http://www.semanticweb.org/kg/relay-cataysis/spieces/>. \n" \
                   "@prefix cata: <http://www.semanticweb.org/kg/relay-cataysis/catalyst/>. \n" \
                   "{expression} \n"

    def define_reaction(self, data):
        reaction_dict = collections.OrderedDict()
        catalyst_dict = collections.OrderedDict()
        # 反应基本信息
        system_reaction_id = data.get("id", "")
        system_snippet_id = data.get("snippet_id", "")
        system_article_id = data.get("article_id", "")
        system_base_id = ""
        if system_reaction_id:
          system_base_id = f"{system_article_id}_{system_snippet_id}_{system_reaction_id}"

        reaction_data = data.get("data", {})
        article_data = get_article_detail(system_article_id) # reaction_data.get("article", {})
        condition_data = reaction_data.get("condition", {})

        reaction_dict["doi"] = article_data.get("doi", "").strip()
        reaction_dict["reaction_type"] = reaction_data.get("type", "")
        reaction_dict["pressure"] = condition_data.get("pressure", "")
        reaction_dict["time"] = condition_data.get("time", "")
        reaction_dict["temperature"] = condition_data.get("temperature", "")
        reaction_dict["thermal_effect"] = reaction_data.get("thermal_effect", "")
        reaction_dict["standard_enthalpy"] = reaction_data.get("standard_enthalpy", "")

        # 列表形式的催化剂信息转换为字典形式
        catalyst_data = reaction_data.get("catalysts", [])
        catalyst_info_dict = {}
        for i, catalyst_info in enumerate(catalyst_data):
            catalyst_dict = {}
            catalyst_dict["mf"] = catalyst_info.get("mf", "")
            catalyst_dict["amount"] = catalyst_info.get("amount", "")
            catalyst_dict["ratio"] = catalyst_info.get("ratio", "")
            catalyst_dict.update(catalyst_info.get("catalyst_props", {}))

            cata_name = catalyst_info.get("name", "")
            cata_iupac_name = catalyst_info.get("iupac_name", "")
            # 获取catalyst的IUPAC名称
            if not cata_iupac_name:
                cata_iupac_name = get_IUPAC_name_final(cata_name or catalyst_dict["mf"], self.mapping_dict)
            # 根据名称和相关属性得到催化剂的唯一ID
            catalyst_str = json.dumps(catalyst_dict)
            catalyst_dict["id"] = str(
                uuid.uuid3(uuid.NAMESPACE_X500, cata_iupac_name + catalyst_str)).replace("-", "")
            catalyst_dict["name"] = cata_name
            catalyst_dict["iupac_name"] = cata_iupac_name
            catalyst_info_dict[cata_iupac_name] = catalyst_dict

        # 反应物
        reactant_list = reaction_data.get("reactants", [])
        # TODO 平衡气
        # 产物
        product_list = reaction_data.get("products", [])
        # 中间体
        intermediate_list = reaction_data.get("intermediates", [])
        # 溶剂
        solvent_list = reaction_data.get("solvents", {})

        # 将所有的化合物名称统一为IUPAC_name
        reactant_info_dict = {}
        for i, reactant_info in enumerate(reactant_list):
            reactant_dict = {}
            reactant_dict["mf"] = reactant_info.get("mf", "")
            reactant_dict["amount"] = reactant_info.get("amount", "")
            reactant_dict["phase"] = reactant_info.get("phase", "")
            reactant_dict["ratio"] = reactant_info.get("ratio", "")
            reactant_name = reactant_info.get("name", "")
            reactant_dict["name"] = reactant_name
            reactant_iupac_name = reactant_info.get("iupac_name", "")
            # 获取reactant的IUPAC名称
            if not reactant_iupac_name:
                reactant_iupac_name = get_IUPAC_name_final(reactant_name or reactant_dict["mf"], self.mapping_dict)
            reactant_dict["iupac_name"] = reactant_iupac_name
            reactant_dict["gas_ratio"] = reactant_info.get("gas_props", {}).get("ratio", "")
            reactant_dict["p_initial"] = reactant_info.get("gas_props", {}).get("p_initial", "")
            reactant_dict["p_finial"] = reactant_info.get("gas_props", {}).get("p_finial", "")
            reactant_dict["space_velocity"] = reactant_info.get("gas_props", {}).get("space_velocity", "")
            reactant_dict["conversion_rate"] = reactant_info.get("reactant_props", {}).get("conversion_rate", "")
            reactant_info_dict[reactant_iupac_name] = reactant_dict

        # 产物信息统一
        product_info_dict = {}
        for i, product_info in enumerate(product_list):
            product_dict = {}
            product_dict["mf"] = product_info.get("mf", "")
            product_dict["amount"] = product_info.get("amount", "")
            product_dict["phase"] = product_info.get("phase", "")
            product_dict["ratio"] = product_info.get("ratio", "")
            product_name = product_info.get("name", "")
            product_dict["name"] = product_name
            product_iupac_name = product_info.get("iupac_name", "")
            # 获取product的IUPAC名称
            if not product_iupac_name:
                product_iupac_name = get_IUPAC_name_final(product_name or product_dict["mf"], self.mapping_dict)
            product_dict["iupac_name"] = product_iupac_name
            product_dict["yield"] = product_info.get("product_props", {}).get("yield", "")
            product_dict["selectivity"] = product_info.get("product_props", {}).get("selectivity", "")
            product_info_dict[product_iupac_name] = product_dict

        # 溶剂信息统一
        solvent_info_dict = {}
        for i, solvent_info in enumerate(solvent_list):
            solvent_dict = {}
            solvent_dict["mf"] = solvent_info.get("mf", "")
            solvent_dict["amount"] = solvent_info.get("amount", "")
            solvent_dict["phase"] = solvent_info.get("phase", "")
            solvent_dict["ratio"] = solvent_info.get("ratio", "")
            solvent_name = solvent_info.get("name", "")
            solvent_dict["name"] = solvent_name
            solvent_iupac_name = solvent_info.get("iupac_name", "")
            # 获取solvent的IUPAC名称
            if not solvent_iupac_name:
                solvent_iupac_name = get_IUPAC_name_final(solvent_name or solvent_dict["mf"], self.mapping_dict)
            solvent_dict["iupac_name"] = solvent_iupac_name
            solvent_dict["concentration"] = solvent_info.get("liquid_props", {}).get("concentration", "")
            solvent_info_dict[solvent_iupac_name] = solvent_dict

        intermediates = []
        intermediate_dict = {}
        # for (key, value) in intermediate_list:
        #     IUPAC_key_name = get_IUPAC_name_final(key, self.mapping_dict)
        #     IUPAC_value_name = get_IUPAC_name_final(value, self.mapping_dict)
        #     intermediate_dict[key] = IUPAC_key_name
        #     intermediate_dict[value] = IUPAC_value_name
        #     intermediates.append((IUPAC_key_name, IUPAC_value_name))


        # 使用id表示reaction_id
        reaction_dict["id"] = system_base_id
        if not system_base_id:
          reaction_str = catalyst_dict.get('id', "")+'_'+json.dumps(reactant_info_dict)+"_"+json.dumps(product_info_dict)+"_"+json.dumps(intermediates)+"_"+json.dumps(solvent_info_dict)+json.dumps(reaction_dict)
          reaction_dict["id"] = str(uuid.uuid3(uuid.NAMESPACE_X500, reaction_str)).replace("-", "")
        # 加上其他必要属性
        reaction_dict["title"] = article_data.get("title", "")
        reaction_dict["journal"] = article_data.get("journal", "")
        reaction_dict["pubdate"] = article_data.get("pub_date", "")
        reaction_dict["authors"] = ', '.join(article_data.get("authors", []))
        reaction_dict["article_id"] = data.get("data", {}).get("article_id", 0)
        reaction_dict["snippet_id"] = data.get("data", {}).get("snippet_id", "")
        reaction_dict["name"] = data.get("data", {}).get("name", "")
        # reaction_dict["id"] = data.get("data", {}).get("id", 0)
        reaction_dict["create_at"] = data.get("data", {}).get("create_at", "")
        reaction_dict["updated_at"] = data.get("data", {}).get("updated_at", "")

        params = {
            "reaction_dict": reaction_dict,
            "reactant_info_dict": reactant_info_dict,
            "product_info_dict": product_info_dict,
            "solvent_info_dict": solvent_info_dict,
            "intermediate_dict": intermediate_dict,
            "intermediate_list": intermediates,
            "catalyst_info_dict": catalyst_info_dict,
        }
        return params

    def insert_reaction(self, params):
        reaction_dict = params.get("reaction_dict", {})
        reactant_info_dict = params.get("reactant_info_dict", {})
        product_info_dict = params.get("product_info_dict", {})
        solvent_info_dict = params.get("solvent_info_dict", {})
        intermediate_list = params.get("intermediates", [])
        catalyst_info_dict = params.get("catalyst_info_dict", {})
        intermediate_dict = params.get("intermediate_dict", {})

        result = ""

        reaction_id = reaction_dict.get("id", "")

        # 添加反应节点
        reaction_detail = self.graph_dict.reaction_detail.format(**{
                **self.graph_dict.properties,
                **dict(
                    reaction_id=reaction_id,
                    doi_value=reaction_dict.get("doi", "").replace('"', r'\"'),
                    title_value=reaction_dict.get("title", "").replace('"', r'\"'),
                    journal_value=reaction_dict.get("journal", ""),
                    pubdate_value=reaction_dict.get("pubdate", ""),
                    author_value=reaction_dict.get("authors", ""),
                    rid_value=reaction_id,
                    reaction_type_value=reaction_dict.get("reaction_type", ""),
                    pressure_value=reaction_dict.get("pressure", "").replace('"', r'\"'),
                    temperature_value=reaction_dict.get("temperature", "").replace('"', r'\"'),
                    thermal_effect_value=reaction_dict.get("thermal_effect", "").replace('"', r'\"'),
                    standard_enthalpy_value=reaction_dict.get("standard_enthalpy", ""),
                    time_value=reaction_dict.get("time", "")
                )
            }
        )
        result = f"{result} \n {reaction_detail}"

        # TODO 继续添加实体属性
        # 添加催化剂属性
        for cata_iupac_name, catalyst_dict in catalyst_info_dict.items():
            catalyst_id = catalyst_dict.get("id", "")
            # cid没用?
            cid = URIRef("cata:" + "{}".format(catalyst_id))

            # 添加反应与催化剂的关系
            reaction_cata = self.graph_dict.reaction_cata.format(
                reaction_id=reaction_id,
                has_catalyst=self.graph_dict.properties["has_catalyst"],
                cid=catalyst_id
            )
            result = f"{result} \n {reaction_cata}"

            cata_detail = self.graph_dict.cata_detail.format(**{
                    **self.graph_dict.properties,
                    **dict(
                        id=catalyst_id,
                        cid_value=catalyst_id,
                        reaction_id=reaction_id,
                        name_value=catalyst_dict.get("name", ""),
                        ratio_value=catalyst_dict.get("ratio", ""),
                        iupac_name_value=catalyst_dict.get("iupac_name", ""),
                        ssa_value=catalyst_dict.get("ssa", ""),
                        tof_value=catalyst_dict.get("tof", ""),
                        shape_value=catalyst_dict.get("shape", ""),
                        stability_value=catalyst_dict.get("stability", "").replace('"', r'\"'),
                        size_value=catalyst_dict.get("size", ""),
                        BET_value=catalyst_dict.get("BET", ""),
                        support_value=catalyst_dict.get("support", ""),
                        synthetic_method_value=catalyst_dict.get("synthetic_method", ""),
                        complex_method_value=catalyst_dict.get("complex_method", ""),
                        pore_size_value=catalyst_dict.get("pore_size", ""),
                        pore_volume_value=catalyst_dict.get("pore_volume", ""),
                        precursor_value=catalyst_dict.get("precursor", ""),
                        loading_value=catalyst_dict.get("loading", "")
                    )
                }
            )
            result = f"{result} \n {cata_detail}"

        # 添加反应物
        for reactant_iupac_name, value_dict in reactant_info_dict.items():
            reactant_name = value_dict.get("name", "") or value_dict.get("mf", "")
            # id_str: 05f338756c3795e0fe583df923cd6a65 react_class: [spi:05f338756c3795e0fe583df923cd6a65]
            id_str, react_class = get_spieces_class_operations(reactant_name, self.mapping_dict, reactant_iupac_name)
            # 添加物质的名称，化学式
            reactant_substance = self.graph_dict.add_spi_propertity(reactant_name, reactant_iupac_name, react_class)
            result = f"{result} \n {reactant_substance}"

            reaction_reactant = self.graph_dict.reaction_reactant.format(
                has_reactant=self.graph_dict.properties["has_reactant"],
                is_reactant_of=self.graph_dict.properties["is_reactant_of"],
                reaction_id=reaction_id,
                reactant_value=str(react_class)
            )
            result = f"{result} \n {reaction_reactant}"

            reactant_detail = self.graph_dict.reactant_detail.format(**{
                    **self.graph_dict.properties,
                    **dict(
                        amount=self.graph_dict.properties["reactant_amount"],
                        ratio=self.graph_dict.properties["reactant_ratio"],

                        reaction_id=reaction_id,
                        reactant_value=str(react_class),
                        reactant_id=id_str,
                        amount_value=value_dict.get("amount", ""),
                        ratio_value=value_dict.get("ratio", "-%"),
                        gas_ratio_value=value_dict.get("gas_ratio", "-%"),
                        p_initial_value=value_dict.get("conversion_rate", "-%"),
                        p_final_value=value_dict.get("p_final", "-%"),
                        conversion_value=value_dict.get("conversion_rate", "-%")
                    )
                }
            )
            result = f"{result} \n {reactant_detail}"

        # 中间体与产物对应
        iproducts = []
        # for inter_pair in intermediate_list:
        #     inter_name = inter_pair[0]
        #     iproduct = inter_pair[1]
        #     iproducts.append(iproduct)
        #     inter_normal_name = product_info_dict.get(inter_name, {}).get("name", "")
        #     iproduct_normal_name = product_info_dict.get(iproduct, {}).get("name", "")
        #     tid_str, inter_class = get_spieces_class_operations(inter_normal_name, self.mapping_dict, inter_name)
        #     pid_str, iproduct_class = get_spieces_class_operations(iproduct_normal_name, self.mapping_dict, iproduct)
        #     inter_uri = "inter:" + tid_str
        #     intermediate_substance = self.graph_dict.add_spi_propertity(inter_normal_name, inter_name, inter_uri)

        #     format_insert2 = self.graph_dict.add_spi_propertity(iproduct_normal_name, iproduct, iproduct_class)

        #     # 添加反应，中间体，产物的关系
        #     reaction_intermediate = """INSERT DATA {{  \n
        #         <react:{reaction_id}> <{has_intermediate}> <{inter_uri}> . \n
        #     }}""".format(
        #         reaction_id=reaction_id,
        #         # has_intermediate=self.graph_dict.properties["has_intermediate"],
        #         inter_uri=inter_uri
        #     )

        #     product_insert = """INSERT DATA {{  \n
        #                     <{product}> a <{product}> . \n
        #                 }}""".format(
        #         product=str(iproduct_class)
        #     )

        #     inter_query = """INSERT DATA {{  \n
        #         <{inter_uri}> a <{inter_class}> ;  \n
        #                     <{is_intermediate_of}> <react:{reaction_id}> ;  \n
        #                     <{has_product}> <{product}> .  \n
        #     }}""".format(
        #         inter_uri=inter_uri,
        #         inter_class=str(inter_class),
        #         # is_intermediate_of=self.graph_dict.properties["is_intermediate_of"],
        #         reaction_id=reaction_id,
        #         # has_product=self.graph_dict.properties["has_product"],
        #         product=str(iproduct_class)
        #     )

        #     # 添加中间体，产物，选择性的关系
        #     proper_dict = product_info_dict[iproduct]
        #     if isinstance(proper_dict, dict) and proper_dict:
        #         selectivity_str = proper_dict.get("selectivity", "")
        #         selectivity_other = proper_dict.get("selectivity_other", "")
        #         yield_str = proper_dict.get("yield", "")
        #         yield_other = proper_dict.get("yield_other", "")
        #         if selectivity_str or selectivity_other or yield_str or yield_other:
        #             # 添加具象化selectivity、selectivity_other、yield、yield_other边属性
        #             statement_ = "state:" + reaction_id + "_product_" + pid_str
        #             intermediate_detail = """INSERT DATA {{ \n
        #                 <{state}> a rdf:Statement ; \n
        #                 rdf:object <{product}> ; \n
        #                 rdf:predicate <{has_product}> ; \n
        #                 rdf:subject <react:{reaction_id}> ; \n
        #                 <{selectivity}> "{selectivity_value}" ; \n
        #                 <{selectivity_other}> "{selectivity_other_value}" ; \n
        #                 <{yield_}> "{yield_value}" ; \n
        #                 <{yield_other}> "{yield_other_value}" . \n
        #             }}""".format(**{
        #                     **self.graph_dict.properties,
        #                     **dict(
        #                         yield_=self.graph_dict.properties["yield"],

        #                         state=statement_,
        #                         product=iproduct_class,
        #                         reaction_id=reaction_id,
        #                         selectivity_value=proper_dict.get("selectivity", "-%"),
        #                         selectivity_other_value=proper_dict.get("selectivity_other", "-"),
        #                         yield_value=proper_dict.get("yield", "-%"),
        #                         yield_other_value=proper_dict.get("yield_other", "-")
        #                     )
        #                 }
        #             )

        # 添加产物
        for product, value_dict in product_info_dict.items():
            if product not in iproducts:
                product_name = value_dict.get("name", "") or value_dict.get("mf", "")
                pid_str, prod_class = get_spieces_class_operations(product_name, self.mapping_dict, product)
                product_substance = self.graph_dict.add_spi_propertity(product_name, product, prod_class)
                result = f"{result} \n {product_substance}"

                reaction_product = self.graph_dict.reaction_product.format(
                    has_product=self.graph_dict.properties["has_product"],
                    is_product_of=self.graph_dict.properties["is_product_of"],
                    reaction_id=reaction_id,
                    product_class=str(prod_class)
                )
                result = f"{result} \n {reaction_product}"

                if isinstance(value_dict, dict) and value_dict:
                    statement_ = "state:" + reaction_id + "_product_" + pid_str
                    product_detail = self.graph_dict.product_detail.format(**{
                            **self.graph_dict.properties,
                            **dict(
                                yield_=self.graph_dict.properties["yield"],

                                state=statement_,
                                product=str(prod_class),
                                reaction_id=reaction_id,
                                selectivity_value=value_dict.get("selectivity", "-%").replace('"', r'\"'),
                                selectivity_other_value=value_dict.get("selectivity_other", "-"),
                                yield_value=value_dict.get("yield", "-%").replace('"', r'\"'),
                                yield_other_value=value_dict.get("yield_other", "-")
                            )
                        }
                    )
                    result = f"{result} \n {product_detail}"

        # 溶剂与反应的关系
        for solvent, value_dict in solvent_info_dict.items():
            solvent_name = value_dict.get("name", "") or value_dict.get("mf", "")
            sid_str, solvent_class = get_spieces_class_operations(solvent_name, self.mapping_dict, solvent)
            solvent_substance = self.graph_dict.add_spi_propertity(solvent_name, solvent, solvent_class)
            result = f"{result} \n {solvent_substance}"

            reaction_solvent = self.graph_dict.reaction_solvent.format(
                has_solvent=self.graph_dict.properties["has_solvent"],
                is_solvent_of=self.graph_dict.properties["is_solvent_of"],
                reaction_id=reaction_id,
                solvent_class=str(solvent_class)
            )
            result = f"{result} \n {reaction_solvent}"


            if isinstance(value_dict, dict) and value_dict:
                statement_ = "state:" + reaction_id + "_solvent_" + sid_str
                solvent_detail = self.graph_dict.solvent_detail.format(
                    has_solvent=self.graph_dict.properties["has_solvent"],
                    concentration=self.graph_dict.properties["concentration"],
                    state=statement_,
                    solvent=str(solvent_class),
                    reaction_id=reaction_id,
                    concentration_value=value_dict.get("concentration", "-%")
                )
                result = f"{result} \n {solvent_detail}"

        return True, "Whole reaction-{} updated successful".format(reaction_id), result


    def insert_one_reaction(self, data):
        # TODO 信息补全：补全化合物的IUPAC_name, CAS号, CHEBI_ID
        params = self.define_reaction(data)
        (check_tag, msg, result) = self.insert_reaction(params)
        return check_tag, msg, result

### 4. 读取上传好的reaction json文件，并依次转化写入ttl文件

In [None]:
reaction_data = []
graphOperations = GraphOperations()
PATH = './all_reactions_202231103.json'  # 此文件为用户上传
with open('./tripple.ttl','w') as ttl:
  ttl.write(graphOperations.update_query.format(expression=""))
with open(PATH, 'r') as f:
  reaction_data = json.load(f)
  reaction_data = reaction_data.get("data")
  length = len(reaction_data)
  logger.info(f"Total: {length} reactions")


for ind, reaction in enumerate(reaction_data):
  # if ind > 1000:
  #   break
  id = reaction_data[ind].get("id", "")
  # print(f"Working on {ind} -- {id} reaction ...")
  try:
    (check_tag, msg, result) = graphOperations.insert_one_reaction(reaction_data[ind]);
    if result:
      with open('./tripple.ttl','a') as ttl:
        ttl.write(result)
    # logger.success(f"Done {ind} -- {id} reaction ... {ind/length * 100}")
  except:
    with open('./error.txt','a') as ttl:
      ttl.write(f"ERROR: can not get data from {ind} -- {id} reaction ...")
    pass
    # logger.error(f"ERROR: can not get data from {ind} -- {id} reaction ...")

In [None]:
!curl -k -F "file=@./tripple.ttl" -F "token=nv23xeecwkfpdyjykof2" -F "model=2"  -X POST "https://tmp-cli.vx-cdn.com/app/upload_cli"