# Import Reference Data from CSV Files


In [None]:
import os
import logging

#logging.getLogger("OpenTLDR").setLevel(logging.ERROR)  # Less output
#logging.getLogger("OpenTLDR").setLevel(logging.WARN)   # Default
#logging.getLogger("OpenTLDR").setLevel(logging.INFO)   # More output
logging.getLogger("OpenTLDR").setLevel(logging.DEBUG)  # So much output


In [None]:
example_entity_csv_file= "./entities.csv"
example_relationship_csv_file= "./relationships.csv"

In [None]:
from opentldr import KnowledgeGraph
kg=KnowledgeGraph()

## Example Loading of Reference Data from a CSV file

- Entities are loaded as ReferenceNodes where the type should match those discovered by NER process
- Relationships are loaded as ReferenceEdges where the To/From attributes match entity text to identify which ReferenceNodes to connect

### Import Entities for Reference Data from a CSV file

CVS Format for Entities:

| Type | Text | JSON MetaData |
| :---: | :--- | :--- |
| PERSON | Chris | { "FullName":"Chris Argenta" } |
| ORG | NCSU | { "FullName":"North Carolina State University" } |

In [None]:
from opentldr.Domain import ReferenceNode

entities: dict = {}

if example_entity_csv_file is not None:
    with open(example_entity_csv_file) as file:
        lines = file.readlines()
        for line in lines[1:]:  # Note: skips the header line in the CSV file 
            # parse out some fields from the csv record
            part = line.split(",")
            entity_type = part[0].strip('\"')
            entity_text = part[1].strip('\"')
            entity_meta = part[2].strip('\"')

            # You can call the KnowledgeGraph API for most actions
            ref_node= kg.add_reference_node(entity_text,entity_type)
            print("Added ({uid}):\t{text} of type {type}".format(uid=ref_node.uid, text=ref_node.text, type=ref_node.type))

            # You can also use the Neomodel api directly from Domain classes
            ref_node.metadata=entity_meta;
            ref_node.save()

            entities[ref_node.text]=ref_node

### Import Relationships between Entities for Reference Data from a CSV file

CVS Format for Relationships:

| Entity From | Entity To | Relationship Type | Relationship Text |
| :---: | :---: | :---: | :--- |
| Chris | NCSU | GRADUATE_OF | Chris Argenta completed his PhD at NCSU in 2017. |


In [None]:
from opentldr.Domain import ReferenceEdge

if example_relationship_csv_file is not None:
    with open(example_relationship_csv_file) as file:
        lines = file.readlines()
        for line in lines[1:]:  # Note: skips the header line in the CSV file 
            part = line.split(",")
            entity_from=entities[part[0].strip('\" ')]
            entity_to= entities[part[1].strip('\" ')]
            rel_type= part[2].strip('\" ')
            rel_text= part[3].strip('\" \n')
            
            edge = kg.add_reference_edge(entity_from,entity_to,type=rel_type, text=rel_text)
            print("Added ({uid}):\t{entity_from} -[{edge}]-> {entity_to}".format(
                uid=edge.uid,
                entity_from=entity_from.text,
                entity_to=entity_to.text,
                edge=edge.type))

### Let's verify that the Reference Knowledge got added
This shows the nodes that were loaded with the Reference Data, including their unique ids.

In [None]:
# Makes a cypher query to the KG
# all_reference_nodes= kg.cypher_query_to_list("MATCH (x:ReferenceNode) RETURN x","x")
all_reference_nodes= kg.get_all_reference_nodes()

print("Found {count} reference nodes in the knowledge graph:".format(count=len(all_reference_nodes)))

# Iterate thru the Reference Nodes and print info for each
for reference_node in all_reference_nodes:
    print(" - {type}({uid}):\t{text}".format(
        type=reference_node.type, uid=reference_node.uid, text=reference_node.text))

# Close down the remote connections to the database

In [None]:
kg.close()