In [4]:
from neo4j import GraphDatabase
import os
from openai import OpenAI

In [2]:
# Configure Neo4j driver
uri = "bolt://localhost:7687"
username = "neo4j"
password = "abr272373574A*"
driver = GraphDatabase.driver(uri, auth=(username, password))

# Set up ChatGPT API
os.environ["OPENAI_API_KEY"] = "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"
client = OpenAI()

In [6]:
def get_nodes_structure(tx):
    query = (
        "MATCH (n) "
        "UNWIND labels(n) AS label "
        "RETURN DISTINCT label, keys(n) AS properties"
    )
    result = tx.run(query)
    structure = {}
    for record in result:
        label = record["label"]
        if record["properties"]:  # Ensure there is at least one property
            property_name = record["properties"][0]  # Take the first property name
            properties_query = (
                f"MATCH (n:{label}) "
                f"WHERE n.`{property_name}` IS NOT NULL "
                f"RETURN n.`{property_name}` AS sample LIMIT 1"
            )
            sample_result = tx.run(properties_query)
            sample_record = sample_result.single()
            if sample_record:
                sample_value = sample_record["sample"]
                structure[label] = {prop: type(sample_value).__name__ for prop in record["properties"]}
    return structure

In [18]:
def get_relationships_structure(tx):
    # Query to get distinct relationship types along with start and end node labels
    query = (
        "MATCH (a)-[r]->(b) "
        "RETURN DISTINCT type(r) AS relationshipType, "
        "labels(a) AS startLabels, labels(b) AS endLabels, keys(r) AS properties"
    )
    result = tx.run(query)
    structure = {}

    for record in result:
        rel_type = record["relationshipType"]
        start_labels = record["startLabels"]
        end_labels = record["endLabels"]
        properties = record["properties"]

        # Initialize relationship type in structure if not already present
        if rel_type not in structure:
            structure[rel_type] = {
                "start_labels": start_labels,
                "end_labels": end_labels,
                "properties": {}
            }

        # If the relationship has properties, fetch a sample value for each
        if properties:
            for prop in properties:
                properties_query = (
                    f"MATCH ()-[r:{rel_type}]->() "
                    f"WHERE r.`{prop}` IS NOT NULL "
                    "RETURN r.`{prop}` AS sample LIMIT 1"
                )
                sample_result = tx.run(properties_query)
                sample_record = sample_result.single()
                if sample_record:
                    sample_value = sample_record["sample"]
                    # Store the type of the sample value for the property
                    structure[rel_type]["properties"][prop] = type(sample_value).__name__
        else:
            # For relationships without properties, indicate they have no properties
            structure[rel_type]["properties"]['no_properties'] = 'None'

    return structure

In [77]:
def get_graph_structure():
    graph_structure = ""
    with driver.session() as session:
        # Get node labels and properties
        node_structure = session.execute_read(get_nodes_structure)
        graph_structure += "Nodes Structure:\n"
        for label, properties in node_structure.items():
            graph_structure += f"Node: {label}\n"
            for prop, prop_type in properties.items():
                graph_structure += f"  - {prop}: {prop_type}\n"
        graph_structure += "\n"
        
        # Get relationship types and properties
        relationship_structure = session.execute_read(get_relationships_structure)
        graph_structure += "Relationships Structure:\n"
        for rel_type, properties in relationship_structure.items():
            graph_structure += f"Relationship: {rel_type}\n"
            for prop, prop_type in properties.items():
                if type(prop_type) != dict:
                    prop = prop.replace('labels', 'node')  # start_node, end_node
                    graph_structure += f"  - {prop}: {prop_type[0]}\n"
                else:
                    for key, value in prop_type.items():
                        if key != 'no_properties':
                            graph_structure += f"  - {key}: {value}\n"
    return graph_structure

In [78]:
print(get_graph_structure())

Nodes Structure:
Node: Clinician
  - name: str
  - password: str
  - email: str
Node: SimilarityReport
  - status: str
  - similarityStrategy: str
  - genotypeScore: str
  - phenotypeScore: str
  - totalScore: str
Node: PhenotypeTerm
  - definition: str
  - comment: str
  - id: str
  - synonyms: str
  - xrefs: str
  - name: str
Node: Disease
  - diseaseName: str
Node: Gene
  - geneSymbol: str
Node: MedicalCenter
  - email: str
  - phone: str
  - name: str
  - address: str
  - subscription: str
  - remainingAnalyses: str
  - password: str
Node: Patient
  - sex: str
  - name: str
  - age: str
  - phenotypeVector: str
Node: Admin
  - password: str
  - email: str

Relationships Structure:
Relationship: IS_A
  - start_node: PhenotypeTerm
  - end_node: PhenotypeTerm
Relationship: ASSOCIATED_WITH_PHENOTYPE
  - start_node: Disease
  - end_node: PhenotypeTerm
  - frequency: NoneType
  - databaseId: NoneType
Relationship: SECONDARY_PATIENT
  - start_node: SimilarityReport
  - end_node: Patient
R

In [73]:
# Function to generate query using ChatGPT
def generate_query(question, graph_structure):
    prompt = f"Graph Structure: {graph_structure}\nQuestion: {question}\nQuery:"
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "Please write a Cypher query to answer the question based on the given graph structure. Keep in mind the order (start-end) of the nodes in the relationships."},
            {"role": "user", "content": f"{prompt}"},
        ],
        temperature=0.1
    )
    return response.choices[0].message.content

In [74]:
# Function to execute query on Neo4j
def execute_query(query):
    records = []
    with driver.session() as session:
        result = session.run(query)
        records = list(result)
    
    return records 

In [79]:
# Main function to interact with ChatGPT and execute query
def main():
    # Example question
    question = "Which medical centers have a patient named Ali Veli?"
    
    # Retrieve graph structure from Neo4j
    graph_structure = get_graph_structure()
    print("Graph Structure:", graph_structure)
    
    # Generate query
    generated_query = generate_query(question, graph_structure)
    print("Generated Query:", generated_query)
    
    # Execute query
    result = execute_query(generated_query)
    for record in result:
        print(record)

In [80]:
main()

Graph Structure: Nodes Structure:
Node: Clinician
  - name: str
  - password: str
  - email: str
Node: SimilarityReport
  - status: str
  - similarityStrategy: str
  - genotypeScore: str
  - phenotypeScore: str
  - totalScore: str
Node: PhenotypeTerm
  - definition: str
  - comment: str
  - id: str
  - synonyms: str
  - xrefs: str
  - name: str
Node: Disease
  - diseaseName: str
Node: Gene
  - geneSymbol: str
Node: MedicalCenter
  - email: str
  - phone: str
  - name: str
  - address: str
  - subscription: str
  - remainingAnalyses: str
  - password: str
Node: Patient
  - sex: str
  - name: str
  - age: str
  - phenotypeVector: str
Node: Admin
  - password: str
  - email: str

Relationships Structure:
Relationship: IS_A
  - start_node: PhenotypeTerm
  - end_node: PhenotypeTerm
Relationship: ASSOCIATED_WITH_PHENOTYPE
  - start_node: Disease
  - end_node: PhenotypeTerm
  - frequency: NoneType
  - databaseId: NoneType
Relationship: SECONDARY_PATIENT
  - start_node: SimilarityReport
  - en

In [5]:
uri = "bolt://localhost:7687"
username = "neo4j"
password = "abr272373574A*"
os.environ["OPENAI_API_KEY"] = "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"

In [7]:
from langchain.chains import GraphCypherQAChain
from langchain.chat_models import ChatOpenAI
from langchain.graphs import Neo4jGraph

graph = Neo4jGraph(
    url=uri, username=username, password=password
)
print(graph.schema)

Node properties are the following:
Disease {diseaseName: STRING},Patient {phenotypeVector: LIST, name: STRING, age: INTEGER, sex: STRING},MedicalCenter {password: STRING, email: STRING, address: STRING, name: STRING, subscription: STRING, phone: STRING, remainingAnalyses: INTEGER},PhenotypeTerm {name: STRING, comment: STRING, id: INTEGER, synonyms: LIST, xrefs: LIST, definition: STRING},Clinician {email: STRING, name: STRING, password: STRING},Admin {password: STRING, email: STRING},Gene {geneSymbol: STRING},SimilarityReport {status: STRING, similarityStrategy: STRING, genotypeScore: STRING, phenotypeScore: STRING, totalScore: STRING}
Relationship properties are the following:
ASSOCIATED_WITH_PHENOTYPE {frequency: STRING, databaseId: STRING}
The relationships are the following:
(:Disease)-[:ASSOCIATED_WITH_PHENOTYPE]->(:PhenotypeTerm),(:Patient)-[:HAS_PHENOTYPE_TERM]->(:PhenotypeTerm),(:Patient)-[:HAS_DISEASE]->(:Disease),(:Patient)-[:BELONGS_TO_MEDICAL_CENTER]->(:MedicalCenter),(:Pati

In [12]:
chain = GraphCypherQAChain.from_llm(
    graph=graph,
    cypher_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125"),
    qa_llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125"),
    verbose=True,
    validate_cypher=True,
)

In [16]:
result = chain(
    " How many patients are under age 35? "
)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Patient)
WHERE p.age < 35
RETURN COUNT(p) as numberOfPatientsUnder35[0m
Full Context:
[32;1m[1;3m[{'numberOfPatientsUnder35': 4}][0m

[1m> Finished chain.[0m
