# parsing dei file in json

In [1]:
# XML Data Extraction
import xml.etree.ElementTree as ET
import json
import os
import uuid

In [2]:
def parse_xml_to_str(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    result = {}
    
    for child in root:
        if child.tag == 'features':
            result['features'] = process_features(xml_file)
        elif child.tag == 'footer':
            result['footer'] = process_footer(xml_file)
        elif child.tag == 'general':
            result['general'] = process_general(xml_file)
        elif child.tag == 'model':
            result['model'] = process_model(xml_file)
        elif child.tag == 'orders':
            result['orders'] = process_orders(xml_file)
        elif child.tag == 'gateway':
            result['gateway'] = process_gateway(xml_file)
        elif child.tag == 'techs':
            result['technical_data'] = process_techs(xml_file)
        # elif child.tag == 'sizes':
        #     result['sizes'] = process_sizes(xml_file)
        elif child.tag == 'user_interface':
            result['user_interface'] = process_user_interface(xml_file)
        elif child.tag == 'serial_interface':
            result['serial_interface'] = process_serial_interface(xml_file)
        elif child.tag == 'safety':
            result['safety'] = process_safety(xml_file)
        # elif child.tag == 'images':
        #     result['images'] = process_images(child)
        # Add other sections if needed

    return {k: v for k, v in result.items()}

def process_features(file_path):
    try:
        # Parse the XML file
        tree = ET.parse(file_path)
        root = tree.getroot()
        
        # Find the features element
        features = root.find('.//features')
        if features is None:
            return "No features found in the XML file."
        
        # Extract the title and features
        title = features.find('title').text if features.find('title') is not None else ""
        sections = features.findall('.//section/feature')
        
        # Format the output text
        structured_text = f"{title}\n"
        for i, feature in enumerate(sections, start=1):
            structured_text += f"{i}. {feature.text}\n"
        
        return structured_text
    except ET.ParseError:
        return "Error: Failed to parse the XML file."
    except FileNotFoundError:
        return "Error: File not found."

def process_footer(file_path):
    try:
        # Parse the XML file
        tree = ET.parse(file_path)
        root = tree.getroot()
        
        # Find the footer element
        footer = root.find('.//footer')
        if footer is None:
            return "No footer found in the XML file."
        
        # Extract the footer elements
        fsm = footer.find('fsm').text if footer.find('fsm') is not None else ""
        disclaimer = footer.find('disclaimer').text if footer.find('disclaimer') is not None else ""
        dts_code = footer.find('dtsCode').text if footer.find('dtsCode') is not None else ""
        ism_code = footer.find('ismCode').text if footer.find('ismCode') is not None else ""
        fm_code = footer.find('fmCode').text if footer.find('fmCode') is not None else ""
        company = footer.find('company').text if footer.find('company') is not None else ""
        
        # Format the output text
        structured_text = f"Functional Safety Management Certification: {fsm}\n"
        structured_text += f"Disclaimer: {disclaimer}\n"
        structured_text += f"DTS Code: {dts_code}\n"
        structured_text += f"ISM Code: {ism_code}\n"
        structured_text += f"FM Code: {fm_code}\n"
        structured_text += f"Company: {company}\n"
        
        return structured_text
    except ET.ParseError:
        return "Error: Failed to parse the XML file."
    except FileNotFoundError:
        return "Error: File not found."

def process_general(file_path):
    try:
        # Parse the XML file
        tree = ET.parse(file_path)
        root = tree.getroot()
        
        # Find the general element
        general = root.find('.//general')
        if general is None:
            return "No general information found in the XML file."
        
        # Extract the general elements
        language = general.find('language').text if general.find('language') is not None else ""
        website = general.find('website').text if general.find('website') is not None else ""
        info = general.find('info').text if general.find('info') is not None else ""
        
        # Format the output text
        structured_text = f"Language: {language}\n"
        structured_text += f"Website: {website}\n"
        structured_text += f"Info: {info}\n"
        
        return structured_text
    except ET.ParseError:
        return "Error: Failed to parse the XML file."
    except FileNotFoundError:
        return "Error: File not found."
    
def process_model(file_path):
    try:
        # Parse the XML file
        tree = ET.parse(file_path)
        root = tree.getroot()
        
        # Find the model element
        model = root.find('.//model')
        if model is None:
            return "No model information found in the XML file."
        
        # Extract the model elements
        family = model.find('family').text if model.find('family') is not None else ""
        name = model.find('name').text if model.find('name') is not None else ""
        # brief = model.find('brief').text if model.find('brief') is not None else ""
        long_description = model.find('long').text if model.find('long') is not None else ""
        use = model.find('use').text if model.find('use') is not None else ""
        field_area = model.find('fieldArea').text if model.find('fieldArea') is not None else ""
        install_area = model.find('installArea').text if model.find('installArea') is not None else ""
        models = model.find('models').text if model.find('models') is not None else ""
        
        # Format the output text
        structured_text = f"Family: {family}\n"
        structured_text += f"Name: {name}\n"
        # structured_text += f"Brief: {brief}\n"
        structured_text += f"Long Description: {long_description}\n"
        structured_text += f"Use: {use}\n"
        structured_text += f"Field Area: {field_area}\n"
        structured_text += f"Install Area: {install_area}\n"
        structured_text += f"Models: {models}\n"
        
        return structured_text
    except ET.ParseError:
        return "Error: Failed to parse the XML file."
    except FileNotFoundError:
        return "Error: File not found."

def process_orders(file_path):
    try:
        # Parse the XML file
        tree = ET.parse(file_path)
        root = tree.getroot()
        
        # Find the orders element
        orders = root.find('.//orders')
        if orders is None:
            return "No orders information found in the XML file."
        
        # Extract the orders elements
        title = orders.find('title').text if orders.find('title') is not None else ""
        sections = orders.findall('.//section')
        
        # Format the output text
        structured_text = f"{title}\n"
        for i, section in enumerate(sections, start=1):
            name = section.find('name').text if section.find('name') is not None else ""
            value = section.find('value').text if section.find('value') is not None else ""
            structured_text += f"{i}. {name}: {value}\n"
        
        return structured_text
    except ET.ParseError:
        return "Error: Failed to parse the XML file."
    except FileNotFoundError:
        return "Error: File not found."

def process_gateway(file_path):
    try:
        # Parse the XML file
        tree = ET.parse(file_path)
        root = tree.getroot()
        
        # Find the gateway element
        gateway = root.find('.//gateway')
        if gateway is None:
            return "No gateway information found in the XML file."
        
        # Extract the gateway elements
        title = gateway.find('title').text if gateway.find('title') is not None else ""
        text = gateway.find('text').text if gateway.find('text') is not None else ""
        
        # Format the output text
        structured_text = f"Title: {title}\n"
        structured_text += f"Text: {text}\n"
        
        return structured_text
    except ET.ParseError:
        return "Error: Failed to parse the XML file."
    except FileNotFoundError:
        return "Error: File not found."

def process_techs(file_path):
    try:
        # Parse the XML file
        tree = ET.parse(file_path)
        root = tree.getroot()
        
        # Find the techs element
        techs = root.find('.//techs')
        if techs is None:
            return "No technical data found in the XML file."
        
        # Extract the techs elements
        title = techs.find('title').text if techs.find('title') is not None else ""
        sections = techs.findall('.//section')
        
        # Format the output text
        structured_text = f"{title}\n"
        for i, section in enumerate(sections, start=1):
            name = section.find('name').text if section.find('name') is not None else ""
            value = section.find('value').text if section.find('value') is not None else ""
            structured_text += f"{i}. {name}: {value}\n"
            subsections = section.findall('.//subsection')
            for j, subsection in enumerate(subsections, start=1):
                sub_name = subsection.find('name').text if subsection.find('name') is not None else ""
                sub_value = subsection.find('value').text if subsection.find('value') is not None else ""
                structured_text += f"    {sub_name}: {sub_value}\n"
        
        return structured_text
    except ET.ParseError:
        return "Error: Failed to parse the XML file."
    except FileNotFoundError:
        return "Error: File not found."

def process_sizes(node):
    size_element = node.find('size')
    title_element = node.find('title')
    size = size_element.text.strip() if size_element is not None else ''
    title = title_element.text.strip() if title_element is not None else ''
    return {size: title}

def process_user_interface(file_path):
    try:
        # Parse the XML file
        tree = ET.parse(file_path)
        root = tree.getroot()
        
        # Find the user_interface element
        user_interface = root.find('.//user_interface')
        if user_interface is None:
            return "No user interface information found in the XML file."
        
        # Extract the user interface elements
        title = user_interface.find('title').text if user_interface.find('title') is not None else ""
        sections = user_interface.findall('.//section')
        
        # Format the output text
        structured_text = f"{title}\n"
        for i, section in enumerate(sections, start=1):
            name = section.find('name').text if section.find('name') is not None else ""
            value = section.find('value').text if section.find('value') is not None else ""
            structured_text += f"{i}. {name}: {value}\n"
            subsections = section.findall('.//subsection')
            for j, subsection in enumerate(subsections, start=1):
                sub_name = subsection.find('name').text if subsection.find('name') is not None else ""
                sub_value = subsection.find('value').text if subsection.find('value') is not None else ""
                structured_text += f"    {sub_name}: {sub_value}\n"
            subimages = section.findall('.//subimage')
            for k, subimage in enumerate(subimages, start=1):
                href = subimage.get('href') if subimage is not None else ""
                structured_text += f"    Image{k}: {href}\n"
        
        return structured_text
    except ET.ParseError:
        return "Error: Failed to parse the XML file."
    except FileNotFoundError:
        return "Error: File not found."

def process_serial_interface(file_path):
    try:
        # Parse the XML file
        tree = ET.parse(file_path)
        root = tree.getroot()
        
        # Find the serial_interface element
        serial_interface = root.find('.//serial_interface')
        if serial_interface is None:
            return "No serial interface information found in the XML file."
        
        # Extract the serial interface elements
        title = serial_interface.find('title').text if serial_interface.find('title') is not None else ""
        sections = serial_interface.findall('.//section')
        
        # Format the output text
        structured_text = f"{title}\n"
        for i, section in enumerate(sections, start=1):
            name = section.find('name').text if section.find('name') is not None else ""
            value = section.find('value').text if section.find('value') is not None else ""
            structured_text += f"{i}. {name}: {value}\n"
            subsections = section.findall('.//subsection')
            for j, subsection in enumerate(subsections, start=1):
                sub_name = subsection.find('name').text if subsection.find('name') is not None else ""
                sub_value = subsection.find('value').text if subsection.find('value') is not None else ""
                structured_text += f"    {sub_name}: {sub_value}\n"
        
        # Process modbusTable into JSON
        modbus_table = serial_interface.find('.//modbusTable')
        if modbus_table is not None:
            entries = []
            for entry in modbus_table.findall('.//entry'):
                address = entry.find('address').text if entry.find('address') is not None else ""
                parameter = entry.find('parameter').text if entry.find('parameter') is not None else ""
                _type = entry.find('type').text if entry.find('type') is not None else ""
                factory = entry.find('factory').text if entry.find('factory') is not None else ""
                access = entry.find('access').text if entry.find('access') is not None else ""
                entries.append({
                    "address": address,
                    "parameter": parameter,
                    "type": _type,
                    "factory": factory,
                    "access": access
                })
            structured_text += "Modbus Table:\n"
            structured_text += json.dumps(entries, indent=4) + "\n"
        
        return structured_text
    except ET.ParseError:
        return "Error: Failed to parse the XML file."
    except FileNotFoundError:
        return "Error: File not found."

def process_safety(file_path):
    try:
        # Parse the XML file
        tree = ET.parse(file_path)
        root = tree.getroot()
        
        # Find the safety element
        safety = root.find('.//safety')
        if safety is None:
            return "No safety information found in the XML file."
        
        # Extract the safety elements
        title = safety.find('title').text if safety.find('title') is not None else ""
        sections = safety.findall('.//section')
        
        # Format the output text
        structured_text = f"{title}\n"
        for i, section in enumerate(sections, start=1):
            name = section.find('name').text if section.find('name') is not None else ""
            text = section.find('text').text if section.find('text') is not None else ""
            structured_text += f"{i}. {name}: {text}\n" if name is not None else f"{i}. {text}\n"
            param_tables = section.findall('.//paramTable')
            for j, param_table in enumerate(param_tables, start=1):
                entries = []
                for entry in param_table.findall('.//entry'):
                    group = entry.find('group').text if entry.find('group') is not None else ""
                    co = entry.find('co').text if entry.find('co') is not None else ""
                    lo = entry.find('lo').text if entry.find('lo') is not None else ""
                    lo_ro = entry.find('lo_ro').text if entry.find('lo_ro') is not None else ""
                    entries.append({
                        "group": group,
                        "co": co,
                        "lo": lo,
                        "lo_ro": lo_ro
                    })
                structured_text += f"    {i}.{j}. Param Table:\n"
                structured_text += json.dumps(entries, indent=4) + "\n"
        
        return structured_text
    except ET.ParseError:
        return "Error: Failed to parse the XML file."
    except FileNotFoundError:
        return "Error: File not found."


def process_fm_control_drawing(file_path):
    try:
        # Parse the XML file
        tree = ET.parse(file_path)
        root = tree.getroot()
        
        # Find the fm_control_drawing element
        fm_control_drawing = root.find('.//fm_control_drawing')
        if fm_control_drawing is None:
            return "No FM control drawing found in the XML file."
        
        # Extract the fm_control_drawing elements
        title = fm_control_drawing.find('title').text if fm_control_drawing.find('title') is not None else ""
        sections = fm_control_drawing.findall('.//section')
        
        # Format the output text
        structured_text = f"{title}\n"
        for i, section in enumerate(sections, start=1):
            name = section.find('name').text if section.find('name') is not None else ""
            text = section.find('text').text if section.find('text') is not None else ""
            structured_text += f"{i}. {name}: {text}\n"
            subsections = section.findall('.//subsection')
            for j, subsection in enumerate(subsections, start=1):
                sub_name = subsection.find('name').text if subsection.find('name') is not None else ""
                sub_value = subsection.find('value').text if subsection.find('value') is not None else ""
                structured_text += f"    {i}.{j}. {sub_name}: {sub_value}\n"
            subimages = section.findall('.//subimage')
            for k, subimage in enumerate(subimages, start=1):
                href = subimage.get('href') if subimage is not None else ""
                structured_text += f"    {i}.Image{k}: {href}\n"
            param_tables = section.findall('.//paramTable')
            for l, param_table in enumerate(param_tables, start=1):
                entries = []
                for entry in param_table.findall('.//entry'):
                    group = entry.find('group').text if entry.find('group') is not None else ""
                    co = entry.find('co').text if entry.find('co') is not None else ""
                    lo = entry.find('lo').text if entry.find('lo') is not None else ""
                    lo_ro = entry.find('lo_ro').text if entry.find('lo_ro') is not None else ""
                    entries.append({
                        "group": group,
                        "co": co,
                        "lo": lo,
                        "lo_ro": lo_ro
                    })
                structured_text += f"    {i}.{l}. Param Table:\n"
                structured_text += json.dumps(entries, indent=4) + "\n"
        
        return structured_text
    except ET.ParseError:
        return "Error: Failed to parse the XML file."
    except FileNotFoundError:
        return "Error: File not found."


In [3]:
# Example usage
file_path = 'X1-IS-AO-02-S_ISM0617_R00.xml'  # Path to your XML file
parsed_X1_IS_AI_01 = parse_xml_to_str(file_path)
print(parsed_X1_IS_AI_01)



In [4]:
print(parsed_X1_IS_AI_01)



In [106]:
import json

# Funzione per salvare il JSON in un file
def save_json_to_file(json_data, filename):
    with open(filename, 'w') as file:
        json.dump(json_data, file, indent=4)

# Specifica i file XML e i nomi dei file JSON di output
#xml_file_1 = 'X1-IS-AO-02-S_ISM0617_R00.xml'  
#json_output_1 = parse_xml_to_str(xml_file_1)
#save_json_to_file(json_output_1, 'json_output_1.json')

xml_file_2 = "X1-IS-DI-02-S_ISM0595_R00.xml"
json_output_2 = parse_xml_to_str(xml_file_2)
save_json_to_file(json_output_2, 'json_output_2.json')

xml_file_3 = "X1-IS-DI-04-S_ISM0597_R00.xml"
json_output_3 = parse_xml_to_str(xml_file_3)
save_json_to_file(json_output_3, 'json_output_3.json')

# impostazioni client per fornire i dati in formato json


In [None]:
from dotenv import load_dotenv
from together import Together
import os
from groq import Groq
from typing import List, Optional, Union
from pydantic import BaseModel, Field, model_validator
from enum import Enum
import time
client_togetherai = Together(api_key=os.getenv("TOGETHERAI_API_KEY"))
client_groq = Groq(api_key=os.getenv("GROQ_API_KEY"))


In [6]:
class BaseNode(BaseModel, frozen=True):
    id: int = Field(..., description="Unique identifier for the node, long enough to avoid collisions with millions of nodes")
    # label: Optional[str] = Field(..., description="Label of the node")
    # properties: Optional[dict] = Field(default_factory=dict, description="Properties of the node")
    
class BaseEdge(BaseModel, frozen=True):
    id: int = Field(..., description="Unique identifier for the edge, long enough to avoid collisions with millions of edges")
    source: int = Field(..., description="Source node ID")
    target: int = Field(..., description="Target node ID")
    label: Optional[str] = Field(..., description="Label of the edge")
    properties: Optional[dict] = Field(default_factory=dict, description="Properties of the edge")
    
class ProductNode(BaseNode, frozen=True):
    label: str = Field("ProductNode", description="This node is used to store product information, like name, family, description, use, field area, installation area, models, and source text.")
    name: str = Field(..., description="Name of the product")
    family: str = Field(..., description="Family of the product")
    description: str = Field(..., description="Description of the product")
    use: str = Field(..., description="Use of the product")
    field_area: Optional[str] = Field(..., description="Field area of the product")
    install_area: Optional[str] = Field(..., description="Installation area of the product")
    models: List[str] = Field(default_factory=list, description="List of product models")
    source_text: str = Field(..., description="text content from which node information is extracted")
    source_embedding: List[float] = Field(..., description="embedding vector of the source_text content")
    
class FeatureNode(BaseNode, frozen=True):
    label: str = Field("FeatureNode", description="This node is used to store product technical and functional features data.")
    name: str = Field(..., description="Name of the feature.")
    value: Union[float, List[float]] = Field(..., description="Value of the feature. It could be a single value or a list two values representing a range")
    unit: str = Field(..., description="Unit of the feature value")
    condition: Optional[str] = Field(..., description="Condition of the feature")
    source_text: str = Field(..., description="text content from which node information is extracted")
    source_embedding: List[float] = Field(..., description="embedding vector of the source_text content")

class InterfaceNode(BaseNode, frozen=True):
    label: str = Field("InterfaceNode", description="This node is used to store product interfaces data")
    name: str = Field(..., description="Name of the interface")
    parameter_name: str = Field(..., description="Name of the parameter")
    parameter_value: Optional[Union[int, float, str, List[float]]] = Field(..., description="Value of the parameter")
    table: Optional[List[dict]] = Field(..., description="Table of the interface")
    source_text: str = Field(..., description="text content from which node information is extracted")
    source_embedding: List[float] = Field(..., description="embedding vector of the source_text content")
    
class InstallationNode(BaseNode, frozen=True):
    label: str = Field("InstallationNode", description="This node is used to store product installation data")
    installation_details: str = Field(..., description="Details of the installation")
    source_text: str = Field(..., description="text content from which node information is extracted")
    source_embedding: List[float] = Field(..., description="embedding vector of the source_text content")

class CertificationNode(BaseNode, frozen=True):
    label: str = Field("CertificationNode", description="This node is used to store product certification data")
    name: str = Field(..., description="Name of the certification")
    certification_number: str = Field(..., description="Certification number")
    standards: List[str] = Field(default_factory=list, description="List of standards associated with the certification")
    source_text: str = Field(..., description="text content from which node information is extracted")
    source_embedding: List[float] = Field(..., description="embedding vector of the source_text content")

class SafetyNode(BaseNode, frozen=True):
    label: str = Field("SafetyNode", description="This node is used to store product safety data")
    title: str = Field(..., description="Title of the safety parameter or section")
    source_text: str = Field(..., description="text content from which node information is extracted")
    source_embedding: List[float] = Field(..., description="embedding vector of the source_text content")
    
class SafetyParametersNode(BaseNode, frozen=True):
    label: str = Field("SafetyParametersNode", description="This node is used to store product safety parameters data")
    parameters: dict = Field(..., description="Table of safety parameters")
    source_text: str = Field(..., description="text content from which node information is extracted")
    source_embedding: List[float] = Field(..., description="embedding vector of the source_text content")

class OrderNode(BaseNode, frozen=True):
    label: str = Field("OrderNode", description="This node is used to store product order data")
    code: str = Field(..., description="Order code")
    source_text: str = Field(..., description="text content from which node information is extracted")
    source_embedding: List[float] = Field(..., description="embedding vector of the source_text content")
    
class StartUpNode(BaseNode, frozen=True):
    label: str = Field("StartUpNode", description="This node is used to store product startup data")
    procedure: str = Field(..., description="Procedure for starting up the product")
    source_text: str = Field(..., description="text content from which node information is extracted")
    source_embedding: List[float] = Field(..., description="embedding vector of the source_text content")

class FieldConnectionNode(BaseNode, frozen=True):
    label: str = Field("FieldConnectionNode", description="This node is used to store product field connection data")
    name: str = Field(..., description="Name of the field connection")
    type: str = Field(..., description="Type of the field connection")
    source_text: str = Field(..., description="text content from which node information is extracted")
    source_embedding: List[float] = Field(..., description="embedding vector of the source_text content")
  
class OperationModeNode(BaseNode, frozen=True):
    label: str = Field("OperationModeNode", description="This node is used to store product operation mode data")
    name: str = Field(..., description="Type of the operation mode")
    description: str = Field(..., description="Instructions for the operation mode")
    source_text: str = Field(..., description="text content from which node information is extracted")
    source_embedding: List[float] = Field(..., description="embedding vector of the source_text content")
 
class ConfigurationModeNode(BaseNode, frozen=True):
    label: str = Field("ConfigurationModeNode", description="This node is used to store product configuration data")
    name: str = Field(..., description="Name of the configuration")
    instructions: str = Field(..., description="Instructions for the configuration mode")
    source_text: str = Field(..., description="text content from which node information is extracted")
    source_embedding: List[float] = Field(..., description="embedding vector of the source_text content")
    
class HasFeatureEdge(BaseEdge):
    label: str = Field("hasFeature", description="Label of the edge")
    
class HasTechnicalDataEdge(BaseEdge):
    label: str = Field("hasTechnicalData", description="Label of the edge")

class HasUserInterfaceEdge(BaseEdge):
    label: str = Field("hasUserInterface", description="Label of the edge")

class HasSerialInterfaceEdge(BaseEdge):
    label: str = Field("hasSerialInterface", description="Label of the edge")
    
class HasInstallationInstructionEdge(BaseEdge):
    label: str = Field("hasInstallation", description="Label of the edge")

class HasCertificationEdge(BaseEdge):
    label: str = Field("hasCertification", description="Label of the edge")
    
class HasSafetyRequirementEdge(BaseEdge):
    label: str = Field("hasSafetyRequirement", description="Label of the edge")

class HasSafetyParameterEdge(BaseEdge):
    label: str = Field("hasSafetyParameter", description="Label of the edge")

class HasOrderCodeEdge(BaseEdge):
    label: str = Field("hasOrderCode", description="Label of the edge")
             
class HasStartUpProcedure(BaseEdge):
    label: str = Field("hasStartUpProcedure", description="Label of the edge")
    
class HasFieldConnectionEdge(BaseEdge):
    label: str = Field("hasFieldConnection", description="Label of the edge")

class HasOperationModeEdge(BaseEdge):
    label: str = Field("hasOperationMode", description="Label of the edge")

class HasConfigurationModeEdge(BaseEdge):
    label: str = Field("hasConfigurationMode", description="Label of the edge")
    
class KnowledgeGraphNodes(BaseModel):
    nodes: List[Union[ProductNode, FeatureNode, InterfaceNode, InstallationNode, CertificationNode, SafetyNode, SafetyParametersNode, OrderNode, StartUpNode, FieldConnectionNode, OperationModeNode, ConfigurationModeNode]]

class KnowledgeGraphEdges(BaseModel):
    edges: List[Union[HasFeatureEdge, HasTechnicalDataEdge, HasUserInterfaceEdge, HasSerialInterfaceEdge, HasInstallationInstructionEdge, HasCertificationEdge, HasSafetyRequirementEdge, HasSafetyParameterEdge, HasOrderCodeEdge, HasStartUpProcedure, HasFieldConnectionEdge, HasOperationModeEdge, HasConfigurationModeEdge]]
    
   
class NodeClassification(BaseModel):
    node_types: List[str] 
    confidence: float  # Value between 0 and 1
    reason: str        # Explanation for the choice

In [7]:
# TOOLS
def generate_uuid():
    return str(uuid.uuid4())

def generate_embeddings(text: List[str]) -> List[float]:
    response = client_togetherai.embeddings.create(
        model = "togethercomputer/m2-bert-80M-8k-retrieval",
        input = text
    )
    return response.data

In [8]:
node_types = ""
for node_name, content in KnowledgeGraphNodes.model_json_schema()['$defs'].items():
    node_types += f"- {node_name}: {content['properties']['label']['description']}\n"
    
print(node_types)

- CertificationNode: This node is used to store product certification data
- ConfigurationModeNode: This node is used to store product configuration data
- FeatureNode: This node is used to store product technical and functional features data.
- FieldConnectionNode: This node is used to store product field connection data
- InstallationNode: This node is used to store product installation data
- InterfaceNode: This node is used to store product interfaces data
- OperationModeNode: This node is used to store product operation mode data
- OrderNode: This node is used to store product order data
- ProductNode: This node is used to store product information, like name, family, description, use, field area, installation area, models, and source text.
- SafetyNode: This node is used to store product safety data
- SafetyParametersNode: This node is used to store product safety parameters data
- StartUpNode: This node is used to store product startup data



In [9]:
## AGENTS DEFINITION
agent_node_creator = """
You are a knowledge graph node creator.

Your technical context:
- Instrinsically safe automation products

Your taks:
- Create the nodes and their attributes that best fit the data provided
- In 'label' attribute put the node type

Remember:
- Do not exclude any information
- In source_text attribute put the full sentence that contains the information structured in the node
- Always separate numerical values from their units of measure
- Tabular data should always go into 'table' attribute
- Specific working conditions should go in 'condition' attribute
- Do not return incomplete node structures. Always return complete 
answers

Examples
Node type: {node_examples} 


Return:
- A valid json without any further comments, suggestions or explanations
"""

agent_node_classificator = """
You are an expert data classifier with deep knowledge in product documentation and knowledge graph structures. Your task is to analyze the provided text data and determine which of the following node types it.

Available Node Types:
{available_nodes}

Your task:
- Analyze the Text: carefully read and comprehend the provided text data and identify key themes in the text.
- Classify into Node Types: determine which node types fit best the content
- Assign Confidence Score: provide a confidence score between 0 and 1, where 1 means complete certainty and 0 means no confidence.
- Provide Reasoning: offer a brief explanation for your classification.

Example output:
{example_output}

This is the data you are working on:

"""

In [10]:
def call_agent(agent_prompt: str, data_input: str, instructor_client:object, llm: str, response_model: Optional[BaseModel] = None):
    messages = [
            {"role": "system", "content": agent_prompt},
            {"role": "user", "content": f"Data you are working on:\n{data_input}"}
        ]
    
    # Log the length of inputs
    print(f"Length of agent_prompt: {len(agent_prompt)} characters")
    print(f"Length of data_input: {len(data_input)} characters")

    # Start timing the API call
    print("Starting API call...")
    start_time = time.time()
    
    try:
        response = instructor_client.chat.completions.create(
            # model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
            model = llm,
            messages=messages,
            # max_tokens=4000,
            temperature=0.0,
            max_retries=3,
            response_model=response_model
        )
    except Exception as e:
        print(f"An error occurred during the API call: {e}")
        raise
    
    end_time = time.time()
    print(f"API call completed in {end_time - start_time:.2f} seconds")

    return response

# def call_agent_v2(agent_prompt: str, data_input: str, response_model: Optional[BaseModel] = None):
#     messages = [
#             {"role": "system", "content": agent_prompt},
#             {"role": "user", "content": data_input}
#         ]
#     # Log the length of inputs
#     print(f"Length of agent_prompt: {len(agent_prompt)} characters")
#     print(f"Length of data_input: {len(data_input)} characters")

#     # Start timing the API call
#     print("Starting API call...")
#     start_time = time.time()
    
#     try:
#         response = client.chat.completions.create(
#             model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
#             messages=messages,
#             max_tokens=32000,
#             temperature=0.0,
#             max_retries=3,
#             seed=69,
#             response_format={"type": "json_object", "schema": response_model.model_json_schema()},
#         )
#     except Exception as e:
#         print(f"An error occurred during the API call: {e}")
#         raise
    
#     end_time = time.time()
#     print(f"API call completed in {end_time - start_time:.2f} seconds")
#     # completion_content = response.choices[0].message.content
#     # return completion_content
#     return response


In [11]:
example_output = json.dumps({
  "node_types": ["SafetyNode", "SafetyParametersNode"],
  "confidence": 0.95,
  "reason": "The text extensively covers safety information, user responsibilities, and intrinsic safety parameters, which correspond to SafetyNode and SafetyParametersNode."
})

# Con questa funzione scopro ogni sezione a quale nodo può appartenere

In [12]:
import json

# Specifica il percorso del file JSON
file_path = 'json_output_1.json'

# Apri e leggi il file JSON
with open(file_path, 'r') as file:
    dati = json.load(file)
# Classify Nodes
node_class = []
for section, content in dati.items():
    # print(agent_node_classificator.format(available_nodes=node_types, example_output=example_output))
    # if section == "user_interface":
    # print(content)
    classification = call_agent(
        agent_prompt=agent_node_classificator.format(available_nodes=node_types, example_output=example_output),
        data_input=content,
        instructor_client=patch_client_togetherai,
        llm="meta-llama/Llama-3.3-70B-Instruct-Turbo",
        response_model=NodeClassification,
    )
    node_class.append({"section": section, "classification": classification, "content": content})

Length of agent_prompt: 1903 characters
Length of data_input: 422 characters
Starting API call...
API call completed in 4.77 seconds
Length of agent_prompt: 1903 characters
Length of data_input: 1025 characters
Starting API call...
API call completed in 6.82 seconds
Length of agent_prompt: 1903 characters
Length of data_input: 81 characters
Starting API call...
API call completed in 2.13 seconds
Length of agent_prompt: 1903 characters
Length of data_input: 934 characters
Starting API call...
API call completed in 4.78 seconds
Length of agent_prompt: 1903 characters
Length of data_input: 65 characters
Starting API call...
API call completed in 1.64 seconds
Length of agent_prompt: 1903 characters
Length of data_input: 264 characters
Starting API call...
API call completed in 7.17 seconds
Length of agent_prompt: 1903 characters
Length of data_input: 1315 characters
Starting API call...
API call completed in 3.82 seconds
Length of agent_prompt: 1903 characters
Length of data_input: 407 cha

In [13]:
for i in range(len(node_class)):
    print(node_class[i]['classification'].node_types)

['FeatureNode', 'SafetyNode']
['CertificationNode', 'SafetyNode']
['ProductNode']
['ProductNode', 'SafetyNode', 'InterfaceNode']
['OrderNode']
['InterfaceNode', 'ConfigurationModeNode', 'ProductNode']
['FeatureNode', 'ProductNode', 'SafetyNode']
['InterfaceNode', 'FieldConnectionNode']
['InterfaceNode', 'ConfigurationModeNode', 'FeatureNode']
['SafetyNode', 'SafetyParametersNode', 'ProductNode', 'CertificationNode']


In [14]:
type(node_class)

list

In [28]:
def extract_node_examples(filename, node_types):
    # Open and load the JSON file
    with open(filename, 'r') as file:
        data = json.load(file)
    
    # Extract examples for the specified node types, preserving node type
    extracted_examples = []
    for node_type in node_types:
        if node_type in data:
            for example in data[node_type]:
                # Add the node type as a key for each example
                extracted_examples.append({node_type: example})
    
    return extracted_examples

node_types = ["ProductNode", "ConfigurationModeNode"]
examples = extract_node_examples("prova.json", node_types)
print(examples)

[{'ProductNode': {'name': 'Hydraulic Pump A10VO', 'family': 'Hydraulics', 'description': 'A variable displacement pump used in hydraulic systems.', 'use': 'Industrial machinery', 'field_area': 'Hydraulics', 'install_area': 'Zone 2 / Division 2', 'models': ['A10VO-30', 'A10VO-40'], 'source_text': 'A10VO Series 31 is designed for variable displacement applications.', 'source_embedding': [0.123, 0.456, 0.789]}}, {'ProductNode': {'name': 'Servo Motor XYZ', 'family': 'Motors', 'description': 'A high-precision servo motor for robotics.', 'use': 'Automation and robotics', 'field_area': None, 'install_area': None, 'models': ['XYZ-1000'], 'source_text': 'XYZ Series designed for high-speed robotics.', 'source_embedding': [0.234, 0.567, 0.89]}}]


In [30]:
def format_node_examples(extracted_examples):
    # Initialize an empty dictionary to group examples by node type
    grouped_examples = {}
    
    # Group examples by node type
    for item in extracted_examples:
        for node_type, example in item.items():
            if node_type not in grouped_examples:
                grouped_examples[node_type] = []
            grouped_examples[node_type].append(example)
    
    # Create a formatted string
    formatted_output = ""
    for node_type, examples in grouped_examples.items():
        formatted_output += f"{node_type}:\n"
        for i, example in enumerate(examples, start=1):
            # formatted_output += f"  Example {i}:\n"
            formatted_output += json.dumps(example, indent=4)
            formatted_output += "\n\n"
    
    return formatted_output

formatted_output = format_node_examples(examples)
print(formatted_output)

ProductNode:
{
    "name": "Hydraulic Pump A10VO",
    "family": "Hydraulics",
    "description": "A variable displacement pump used in hydraulic systems.",
    "use": "Industrial machinery",
    "field_area": "Hydraulics",
    "install_area": "Zone 2 / Division 2",
    "models": [
        "A10VO-30",
        "A10VO-40"
    ],
    "source_text": "A10VO Series 31 is designed for variable displacement applications.",
    "source_embedding": [
        0.123,
        0.456,
        0.789
    ]
}

{
    "name": "Servo Motor XYZ",
    "family": "Motors",
    "description": "A high-precision servo motor for robotics.",
    "use": "Automation and robotics",
    "field_area": null,
    "install_area": null,
    "models": [
        "XYZ-1000"
    ],
    "source_text": "XYZ Series designed for high-speed robotics.",
    "source_embedding": [
        0.234,
        0.567,
        0.89
    ]
}




In [31]:
node_class[3]['classification'].node_types

['ProductNode', 'SafetyNode', 'InterfaceNode']

In [34]:
nodes_to_be_used = node_class[3]['classification'].node_types
examples = extract_node_examples('prova.json', nodes_to_be_used)
formatted_examples = format_node_examples(examples)
print(agent_node_creator.format(node_examples=formatted_examples))
#questa è una demo ma va aggiornata con tutti i nodi


You are a knowledge graph node creator.

Your technical context:
- Instrinsically safe automation products

Your taks:
- Create the nodes and their attributes that best fit the data provided
- In 'label' attribute put the node type

Remember:
- Do not exclude any information
- In source_text attribute put the full sentence that contains the information structured in the node
- Always separate numerical values from their units of measure
- Tabular data should always go into 'table' attribute
- Specific working conditions should go in 'condition' attribute
- Do not return incomplete node structures. Always return complete 
answers

Examples
Node type: ProductNode:
{
    "name": "Hydraulic Pump A10VO",
    "family": "Hydraulics",
    "description": "A variable displacement pump used in hydraulic systems.",
    "use": "Industrial machinery",
    "field_area": "Hydraulics",
    "install_area": "Zone 2 / Division 2",
    "models": [
        "A10VO-30",
        "A10VO-40"
    ],
    "source_

In [35]:
class Node(BaseModel):
    id: Optional[int] = Field("unique uid")
    label: str = Field("Node type")
    properties: dict = Field("contains nodes attributes")
    
class KnowledgeNodes(BaseModel):
    nodes: List[Node]

In [41]:
# Build Nodes 
#fallisce su safety perché è troppo lungo
build_nodes = []
for extraction in node_class:
    nodes_to_be_used = extraction['classification'].node_types
    examples = extract_node_examples('prova.json', nodes_to_be_used) #qui prova.json va aggornato
    formatted_examples = format_node_examples(examples)
    print(f"Analizing section {extraction['section']}")
    resp = call_agent(agent_prompt=agent_node_creator.format(node_examples=formatted_examples), 
                    data_input=extraction['content'], 
                    instructor_client=patch_client_togetherai,
                    # instructor_client=patch_client_groq,
                    llm="meta-llama/Llama-3.3-70B-Instruct-Turbo",
                    # llm='mixtral-8x7b-32768', # cannot use llama with groq due to max_token limit of 8k
                    response_model=KnowledgeNodes
                    )
    print("Appending response")
    build_nodes.append(resp)

Analizing section features
Length of agent_prompt: 946 characters
Length of data_input: 422 characters
Starting API call...
API call completed in 32.83 seconds
Appending response
Analizing section footer
Length of agent_prompt: 946 characters
Length of data_input: 1025 characters
Starting API call...
API call completed in 8.59 seconds
Appending response
Analizing section general
Length of agent_prompt: 1639 characters
Length of data_input: 81 characters
Starting API call...
API call completed in 10.66 seconds
Appending response
Analizing section model
Length of agent_prompt: 1841 characters
Length of data_input: 934 characters
Starting API call...
API call completed in 6.20 seconds
Appending response
Analizing section orders
Length of agent_prompt: 744 characters
Length of data_input: 65 characters
Starting API call...
API call completed in 2.39 seconds
Appending response
Analizing section gateway
Length of agent_prompt: 1639 characters
Length of data_input: 264 characters
Starting API

InstructorRetryException: The output is incomplete due to a max_tokens length limit.

In [43]:
v2_nodes = build_nodes.copy()

In [44]:
v2_nodes #proprieties va adesso aggiunto dentro la giusta classe del nodo label


[KnowledgeNodes(nodes=[Node(id=1, label='SafetyNode', properties={'title': 'SIL 2 / SC 3 Certification', 'source_text': 'SIL 2 / SC 3 (pending)', 'certification_level': 'SIL 2', 'certification_type': 'SC 3', 'status': 'pending'}), Node(id=2, label='InputNode', properties={'title': 'Input from Zone 0 / Division 1', 'source_text': 'Input from Zone 0 / Division 1 (pending)', 'zone': '0', 'division': '1', 'status': 'pending'}), Node(id=3, label='InstallationNode', properties={'title': 'Installation in Zone 2 / Division 2', 'source_text': 'Installation in Zone 2 / Division 2 (pending)', 'zone': '2', 'division': '2', 'status': 'pending'}), Node(id=4, label='MaintenanceNode', properties={'title': 'Loop Disconnection for Maintenance', 'source_text': 'Loop disconnection to ease maintenance operations', 'feature': 'loop disconnection'}), Node(id=5, label='CompatibilityNode', properties={'title': 'HART Compatibility', 'source_text': 'HART compatible', 'protocol': 'HART'}), Node(id=6, label='Diagn

In [45]:
for lst_nodes in v2_nodes:
    for node in lst_nodes.nodes:
        node.id = generate_uuid()
        # node.properties['source_embedding'] = generate_embeddings(node.properties['source_text'])
        
v2_nodes

[KnowledgeNodes(nodes=[Node(id='51ffb503-95c4-4e88-8e07-13657c75df90', label='SafetyNode', properties={'title': 'SIL 2 / SC 3 Certification', 'source_text': 'SIL 2 / SC 3 (pending)', 'certification_level': 'SIL 2', 'certification_type': 'SC 3', 'status': 'pending'}), Node(id='f840705a-6925-4db9-929c-bb9528333e84', label='InputNode', properties={'title': 'Input from Zone 0 / Division 1', 'source_text': 'Input from Zone 0 / Division 1 (pending)', 'zone': '0', 'division': '1', 'status': 'pending'}), Node(id='37ae1a57-b500-40a0-b97a-1d8be475ff6a', label='InstallationNode', properties={'title': 'Installation in Zone 2 / Division 2', 'source_text': 'Installation in Zone 2 / Division 2 (pending)', 'zone': '2', 'division': '2', 'status': 'pending'}), Node(id='4e0fe973-307e-40bd-9122-e2752bf29d62', label='MaintenanceNode', properties={'title': 'Loop Disconnection for Maintenance', 'source_text': 'Loop disconnection to ease maintenance operations', 'feature': 'loop disconnection'}), Node(id='922

In [46]:
for k, v in KnowledgeGraphNodes.model_json_schema()['$defs'].items():
    print(k)
    print([elem[0] for elem in list(v['properties'].items())])
    # print(list(v['properties'].keys()))

CertificationNode
['id', 'label', 'name', 'certification_number', 'standards', 'source_text', 'source_embedding']
ConfigurationModeNode
['id', 'label', 'name', 'instructions', 'source_text', 'source_embedding']
FeatureNode
['id', 'label', 'name', 'value', 'unit', 'condition', 'source_text', 'source_embedding']
FieldConnectionNode
['id', 'label', 'name', 'type', 'source_text', 'source_embedding']
InstallationNode
['id', 'label', 'installation_details', 'source_text', 'source_embedding']
InterfaceNode
['id', 'label', 'name', 'parameter_name', 'parameter_value', 'table', 'source_text', 'source_embedding']
OperationModeNode
['id', 'label', 'name', 'description', 'source_text', 'source_embedding']
OrderNode
['id', 'label', 'code', 'source_text', 'source_embedding']
ProductNode
['id', 'label', 'name', 'family', 'description', 'use', 'field_area', 'install_area', 'models', 'source_text', 'source_embedding']
SafetyNode
['id', 'label', 'title', 'source_text', 'source_embedding']
SafetyParameter

In [162]:
list(v['properties'].items())

[('id',
  {'description': 'Unique identifier for the node, long enough to avoid collisions with millions of nodes',
   'title': 'Id',
   'type': 'integer'}),
 ('label',
  {'default': 'StartUpNode',
   'description': 'This node is used to store product startup data',
   'title': 'Label',
   'type': 'string'}),
 ('procedure',
  {'description': 'Procedure for starting up the product',
   'title': 'Procedure',
   'type': 'string'}),
 ('source_text',
  {'description': 'text content from which node information is extracted',
   'title': 'Source Text',
   'type': 'string'}),
 ('source_embedding',
  {'description': 'embedding vector of the source_text content',
   'items': {'type': 'number'},
   'title': 'Source Embedding',
   'type': 'array'})]

In [56]:
build_nodes[2].nodes[0].properties

{'name': 'GM International Products',
 'family': 'Intrinsically Safe Automation',
 'description': 'Preliminary information on intrinsically safe automation products',
 'use': 'Industrial automation',
 'field_area': 'Hazardous areas',
 'install_area': 'Zone 1 / Division 1',
 'models': None,
 'source_text': 'PRELIMINARY INFORMATION on intrinsically safe automation products',
 'source_embedding': [0.123, 0.456, 0.789],
 'website': 'www.gminternational.com',
 'language': 'English'}

# per adesso abbiamo creato solo i nodi

In [57]:
prompt = """
Output the given data into KnowledgeGraphNodes pydantic model. 
This are the possible nodes within it:
{nodes}
"""
call_agent(
    agent_prompt=prompt,
    data_input=build_nodes[2].nodes[0].properties,
    instructor_client=patch_client_togetherai,
    llm="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
    response_model=KnowledgeGraphNodes
)

Length of agent_prompt: 112 characters
Length of data_input: 11 characters
Starting API call...
An error occurred during the API call: Error code: 400 - {"message": "Template error: (unknown path) [Line 77, Column 31]\n  Error: Unable to call `raise_exception`, which is undefined or falsey", "type_": "invalid_request_error", "param": "messages"}


InstructorRetryException: Error code: 400 - {"message": "Template error: (unknown path) [Line 77, Column 31]\n  Error: Unable to call `raise_exception`, which is undefined or falsey", "type_": "invalid_request_error", "param": "messages"}

# Definizione del knowledge graph


In [58]:
from graphviz import Digraph
from IPython.display import display
class Node(BaseModel):
    id: int
    label: str
    color: str
    properties: dict = Field(default_factory={}, description="Attributes of the node")
    
    def __hash__(self) -> int:
        return hash((id, self.label))
    
class Edge(BaseModel):
    id: int
    source: int = Field(..., description="Source node ID")
    target: int = Field(..., description="Target node ID")
    label: str = Field(..., description="Label of the edge")
    color: str = "black"
    properties: Optional[dict] = Field(default_factory=dict, description="Properties of the edge")
    
    def __hash__(self) -> int:
        return hash((self.source, self.target, self.label))
    
class KnowledgeGraph(BaseModel):
    # Optional list of nodes and edges in the knowledge graph
    nodes: Optional[list[Node]] = Field(..., default_factory=list)
    edges: Optional[list[Edge]] = Field(..., default_factory=list)

    def update(self, other: "KnowledgeGraph") -> "KnowledgeGraph":
        # This method updates the current graph with the other graph, deduplicating nodes and edges.
        return KnowledgeGraph(
            nodes=list(set(self.nodes + other.nodes)),  # Combine and deduplicate nodes
            edges=list(set(self.edges + other.edges)),  # Combine and deduplicate edges
        )
        
    def visualize_knowledge_graph(self):
        dot = Digraph(comment="Knowledge Graph")

        for node in self.nodes:
            dot.node(str(node.id), node.label, color=node.color)
        for edge in self.edges:
            dot.edge(str(edge.source), str(edge.target), label=edge.label, color=edge.color)
        
        return display(dot)
 

In [59]:
agent_node_artist = """
You are a knowledge graph creator.
Your task is to create a knowledge graph based on the given text.
You are given the current state of the graph, and you must append the nodes and edges to it

Knowledge context:
- Instrinsically safe barriers and isolators
- Highly technical and engineering information

Instructions:
- Create nodes and edges
- All node attributes should go inside 'propierties' attribute
- All nodes should contains 'id', 'source_text' (the text piece where the node attributes are coming from) and 'source_embedding' (vector embedding of source_text)
- Attributes representing technical features must be split in 'name', 'value' and 'unit' where 'value' could be a single number or a list of 2 numbers if it represents a range
- 'condition' is an attribute to add a specific condition for the technical feature
- Tabular content should be placed in 'table_data' attribute
- Edges should be camel case and descriptive

Example Node:
{node_example}

Example Edge:
{edge_example}

Remember:
- Do not provide any duplcates
- Try to reuse nodes as much as possible

"""


In [60]:
chunks = [elem['content'] for elem in node_class]
chunks

['FEATURES\n1. SIL 2 / SC 3 (pending)\n2. Input from Zone 0 / Division 1 (pending)\n3. Installation in Zone 2 / Division 2 (pending)\n4. Loop disconnection to ease maintenance operations\n5. HART® compatible\n6. Line & Load short/open circuit programmable diagnostics\n7. Out-of-range fault with programmable thresholds\n8. Field fault mirroring to the DCS/PLC IO Card\n9. High Accuracy\n10. Three port isolation, Input/Output/Supply\n',
 'Functional Safety Management Certification: Functional Safety Management Certification:\nGM International is certified to conform to IEC61508:2010 part 1 clauses 5-6 for safety related systems up to and included SIL3. In addition, GM International products have been granted I.S. certificates from the most credited Notified Bodies in the world.\nDisclaimer: Data specified in this document are merely descriptive of the products and should be integrated with relevant technical specifications. Our products are in constant development and the information pres

In [63]:
import json

def load_json(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        return json.load(file)

# Carica i dati da node.json e edge.json
node_example = load_json("node.json")
edge_example = load_json("edge.json")

def generate_graph(chunks: list[str]) -> KnowledgeGraph:
    # Initialize an empty KnowledgeGraph
    cur_state = KnowledgeGraph()
    # Iterate over the input list
    for chunk in chunks:
        print("New chunk to parse")
        new_updates = patch_client_togetherai.chat.completions.create(
            model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
            messages=[
                {"role": "system",
                 "content": agent_node_artist.format(
                     node_example=json.dumps(node_example),
                     edge_example=json.dumps(edge_example))
                 },
                {"role": "user",
                 "content": f"""Extract any new nodes and edges from the following:\n{chunk}\n""",
                 },
                {"role": "user",
                 "content": f"""Here is the current state of the graph: {cur_state.model_dump_json(indent=2)}""",
                 },
            ],
            response_model=KnowledgeGraph,
        )  # type: ignore
        # Update the current state with the new updates
        print("Updating graph")
        cur_state = cur_state.update(new_updates)
    
    # Return the final state of the KnowledgeGraph
    return cur_state

state = generate_graph(chunks)


New chunk to parse
Updating graph
New chunk to parse


InstructorRetryException: 1 validation error for KnowledgeGraph
  Invalid JSON: trailing characters at line 11 column 1 [type=json_invalid, input_value='{\n  "id": 3,\n  "label"...     ...\n    }\n  ]\n}', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/json_invalid