In [1]:
from neo4j import GraphDatabase
import os
from openai import OpenAI

In [2]:
# Configure Neo4j driver
uri = "bolt://localhost:7687"
username = "neo4j"
password = "abr272373574A*"
driver = GraphDatabase.driver(uri, auth=(username, password))

# Set up ChatGPT API
os.environ["OPENAI_API_KEY"] = "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"
client = OpenAI()

In [6]:
def get_nodes_structure(tx):
    query = (
        "MATCH (n) "
        "UNWIND labels(n) AS label "
        "RETURN DISTINCT label, keys(n) AS properties"
    )
    result = tx.run(query)
    structure = {}
    for record in result:
        label = record["label"]
        if record["properties"]:  # Ensure there is at least one property
            property_name = record["properties"][0]  # Take the first property name
            properties_query = (
                f"MATCH (n:{label}) "
                f"WHERE n.`{property_name}` IS NOT NULL "
                f"RETURN n.`{property_name}` AS sample LIMIT 1"
            )
            sample_result = tx.run(properties_query)
            sample_record = sample_result.single()
            if sample_record:
                sample_value = sample_record["sample"]
                structure[label] = {prop: type(sample_value).__name__ for prop in record["properties"]}
    return structure

In [18]:
def get_relationships_structure(tx):
    # Query to get distinct relationship types along with start and end node labels
    query = (
        "MATCH (a)-[r]->(b) "
        "RETURN DISTINCT type(r) AS relationshipType, "
        "labels(a) AS startLabels, labels(b) AS endLabels, keys(r) AS properties"
    )
    result = tx.run(query)
    structure = {}

    for record in result:
        rel_type = record["relationshipType"]
        start_labels = record["startLabels"]
        end_labels = record["endLabels"]
        properties = record["properties"]

        # Initialize relationship type in structure if not already present
        if rel_type not in structure:
            structure[rel_type] = {
                "start_labels": start_labels,
                "end_labels": end_labels,
                "properties": {}
            }

        # If the relationship has properties, fetch a sample value for each
        if properties:
            for prop in properties:
                properties_query = (
                    f"MATCH ()-[r:{rel_type}]->() "
                    f"WHERE r.`{prop}` IS NOT NULL "
                    "RETURN r.`{prop}` AS sample LIMIT 1"
                )
                sample_result = tx.run(properties_query)
                sample_record = sample_result.single()
                if sample_record:
                    sample_value = sample_record["sample"]
                    # Store the type of the sample value for the property
                    structure[rel_type]["properties"][prop] = type(sample_value).__name__
        else:
            # For relationships without properties, indicate they have no properties
            structure[rel_type]["properties"]['no_properties'] = 'None'

    return structure

In [37]:
with driver.session() as session:
    # Get node labels and properties
    node_structure = session.read_transaction(get_nodes_structure)
    print("Nodes Structure:")
    for label, properties in node_structure.items():
        print(f"Node: {label}")
        for prop, prop_type in properties.items():
            print(f"  - {prop}: {prop_type}")
    print("\n")
    
    # Get relationship types and properties
    relationship_structure = session.read_transaction(get_relationships_structure)
    print("Relationships Structure:")
    for rel_type, properties in relationship_structure.items():
        print(f"Relationship: {rel_type}")
        for prop, prop_type in properties.items():
            if type(prop_type) != dict:
                prop = prop.replace('labels', 'node')  # start_node, end_node
                print(f"  - {prop}: {prop_type[0]}")
            else:
                for key, value in prop_type.items():
                    if key != 'no_properties':
                        print(f"  - {key}: {value}")

Nodes Structure:
Node: Clinician
  - name: str
  - password: str
  - email: str
Node: SimilarityReport
  - status: str
  - similarityStrategy: str
  - genotypeScore: str
  - phenotypeScore: str
  - totalScore: str
Node: PhenotypeTerm
  - definition: str
  - comment: str
  - id: str
  - synonyms: str
  - xrefs: str
  - name: str
Node: Disease
  - diseaseName: str
Node: Gene
  - geneSymbol: str
Node: MedicalCenter
  - email: str
  - phone: str
  - name: str
  - address: str
  - subscription: str
  - remainingAnalyses: str
  - password: str
Node: Patient
  - sex: str
  - name: str
  - age: str
  - phenotypeVector: str
Node: Admin
  - password: str
  - email: str


Relationships Structure:
Relationship: IS_A
  - start_node: PhenotypeTerm
  - end_node: PhenotypeTerm
Relationship: ASSOCIATED_WITH_PHENOTYPE
  - start_node: Disease
  - end_node: PhenotypeTerm
  - frequency: NoneType
  - databaseId: NoneType
Relationship: SECONDARY_PATIENT
  - start_node: SimilarityReport
  - end_node: Patient


  node_structure = session.read_transaction(get_nodes_structure)
  relationship_structure = session.read_transaction(get_relationships_structure)


In [5]:
# Function to generate query using ChatGPT
def generate_query(question, graph_structure):
    prompt = f"Graph Structure: {graph_structure}\nQuestion: {question}\nQuery:"
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user", "content": f"{prompt}"},
        ],
        temperature=0.1
    )
    return response.choices[0].message.content

In [6]:
# Function to execute query on Neo4j
def execute_query(query):
    with driver.session() as session:
        result = session.run(query)
        return result

In [7]:
# Main function to interact with ChatGPT and execute query
def main():
    # Example question
    question = "Which patients have a phenotype HP:0000007?"
    
    # Retrieve graph structure from Neo4j
    graph_structure = get_graph_structure()
    print("Graph Structure:", graph_structure)
    
    # Generate query
    generated_query = generate_query(question, graph_structure)
    print("Generated Query:", generated_query)
    
    # Execute query
    result = execute_query(generated_query)
    for record in result:
        print(record)

In [8]:
main()

Graph Structure: (['Nodes:', '- MedicalCenter: {name: MedicalCenter, indexes: [], constraints: []}', '- Disease: {name: Disease, indexes: [], constraints: []}', '- Gene: {name: Gene, indexes: [], constraints: []}', '- PhenotypeTerm: {name: PhenotypeTerm, indexes: [], constraints: ["Constraint( id=4, name=\'constraint_phenotypeterm_id\', type=\'UNIQUENESS\', schema=(:PhenotypeTerm {id}), ownedIndex=3 )"]}', '- SimilarityReport: {name: SimilarityReport, indexes: [], constraints: []}', '- Patient: {name: Patient, indexes: [], constraints: []}', '- Admin: {name: Admin, indexes: [], constraints: []}', '- Clinician: {name: Clinician, indexes: [], constraints: []}'], ['Relationships:', '- From SimilarityReport to Patient: PRIMARY_PATIENT\n  Properties: {name: PRIMARY_PATIENT}', '- From Patient to MedicalCenter: BELONGS_TO_MEDICAL_CENTER\n  Properties: {name: BELONGS_TO_MEDICAL_CENTER}', '- From Gene to PhenotypeTerm: ASSOCIATED_WITH_PHENOTYPE\n  Properties: {name: ASSOCIATED_WITH_PHENOTYPE}',

ResultConsumedError: The result has been consumed. Fetch all needed records before calling Result.consume().