In [57]:
import json
from langchain_community.graphs.age_graph import AGEGraph
import urllib.parse
from dataclasses import dataclass


database = {
    "database": "postgres",
    "user": "postgres",
    "password": "password",
    "host": "localhost",
    "port": "5432"
}

In [58]:
graph = AGEGraph(graph_name="gnd", conf=database, create=True)

graph.refresh_schema()

print(graph.schema)


        Node properties are the following:
        []
        Relationship properties are the following:
        []
        The relationships are the following:
        []
        


In [59]:
@dataclass
class Subject:
    code: str
    name: str
    classification_name: str
    alternative_names: list[str]
    related: list[str]

In [None]:
with open("llms4subjects/shared-task-datasets/GND/dataset/GND-Subjects-tib-core.json", "r") as gnd_subjects_file:
    gnd_subjects = json.load(gnd_subjects_file)

subjects = [
    Subject(
        code=urllib.parse.quote_plus(subject["Code"]),
        name=urllib.parse.quote_plus(subject["Name"]),
        classification_name=urllib.parse.quote_plus(subject["Classification Name"]),
        alternative_names=[
            urllib.parse.quote_plus(alternative_name)
            for alternative_name in subject["Alternate Name"]
        ],
        related=[
            urllib.parse.quote_plus(related)
            for related in subject["Related Subjects"]
        ]
    )
    for subject in gnd_subjects
]

for subject in subjects:
    graph.query(
        f"""
        CREATE (s:Subject {{code: '{subject.code}', name: '{subject.code}', classification_name: '{subject.classification_name}'}})
        """,
    )

AGEQueryException: {'message': 'Error executing graph query: CREATE UNIQUE INDEX ON :Subject(code)', 'detail': 'syntax error at or near "INDEX"\nLINE 2:             CREATE UNIQUE INDEX ON :Subject(code)\n                                  ^\n'}

In [74]:
import psycopg2
conn: psycopg2.extensions.connection = graph.connection
conn.commit()
with conn.cursor() as cursor:
    cursor.execute("CREATE UNIQUE INDEX IF NOT EXISTS subject_code ON gnd.\"Subject\"(ag_catalog.agtype_access_operator(properties, '\"code\"'::agtype))")
    cursor.execute("CREATE UNIQUE INDEX IF NOT EXISTS subject_name ON gnd.\"Subject\"(ag_catalog.agtype_access_operator(properties, '\"name\"'::agtype))")
conn.commit()

In [76]:
for subject in subjects:
    for alternative_name in subject.alternative_names:
        graph.query(
            f"""
            CREATE (a:AlternativeName {{name: "{alternative_name}"}})
            """
        )
        graph.query(
            f"""
            MATCH (s:Subject {{code: "{subject.code}"}}), (a:AlternativeName {{name: "{alternative_name}"}})
                MERGE (s)-[:ALTERNATIVE_NAME]->(a)
            """
        )

In [78]:
import psycopg2
conn: psycopg2.extensions.connection = graph.connection
conn.commit()
with conn.cursor() as cursor:
    cursor.execute("CREATE INDEX IF NOT EXISTS alternativename_name ON gnd.\"AlternativeName\"(ag_catalog.agtype_access_operator(properties, '\"name\"'::agtype))")
conn.commit()

In [79]:
for subject in subjects:
    for related_subject in subject.related:
        graph.query(
            f"""
            MATCH (s1:Subject {{name: "{subject.name}"}}), (s2:Subject {{name: "{related_subject}"}})
                MERGE (s1)-[:RELATED]->(s2)
            """
        )