In [1]:
pip install openai==0.28

Note: you may need to restart the kernel to use updated packages.


In [6]:
import openai
import json
from neo4j import GraphDatabase

# Insert your OpenAI key
openai.api_key = ""  # Insert your OpenAI key here

# Function to extract entities and relationships from a single abstract using ChatGPT
def extract_entities_relationships(abstract):
    # Define the prompt to extract entities and relationships for the single abstract
    prompt = f"""
    Extract the entities and relationships from the following abstract:

    {abstract}

    Provide the output as a JSON in this format:
    {{
      "entities": [
        {{"id": "Entity1", "type": "Type1"}},
        {{"id": "Entity2", "type": "Type2"}}
      ],
      "relationships": [
        {{"source": "Entity1", "target": "Entity2", "relation": "RELATION_TYPE"}}
      ]
    }}
    Ensure the output uses double quotes for property names and values.
    """

    # Make an API call to extract entities and relationships for the abstract
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.5,
        max_tokens=1500
    )

    # Return the string content of the response
    return response['choices'][0]['message']['content']

# Function to escape special characters for Cypher queries
def escape_special_chars(value):
    return value.replace("'", "''")

# Neo4j connection setup using driver.session()
uri = "bolt://localhost:7999"  # Adjust according to your Neo4j setup
username = "neo4j"
password = "password"
driver = GraphDatabase.driver(uri, auth=(username, password))

# Function to create nodes and relationships in Neo4j
def create_knowledge_graph(session, entities, relationships):
    # Create nodes (entities)
    for entity in entities:
        entity_id = escape_special_chars(entity['id'])  # Escape special characters
        entity_type = escape_special_chars(entity['type'])  # Escape special characters
        query = f"""
        MERGE (e:Entity {{id: '{entity_id}', type: '{entity_type}'}})
        """
        session.run(query)
    
    # Create relationships with valid syntax for relationship types
    for relationship in relationships:
        source = escape_special_chars(relationship['source'])  # Escape special characters
        target = escape_special_chars(relationship['target'])  # Escape special characters
        relation = escape_special_chars(relationship['relation'].upper().replace(" ", "_"))  # Escape and format relationship type
        query = f"""
        MATCH (source:Entity {{id: '{source}'}}),
              (target:Entity {{id: '{target}'}})
        MERGE (source)-[:{relation}]->(target)
        """
        session.run(query)

# The single abstract (replace with your actual data)
abstract = """
Valsartan was recalled by the US Food and Drug Administration in July 2018 for carcinogenic impurities, resulting in a drug shortage and management challenges for valsartan users. The influence of the valsartan recall on clinical outcomes is unknown. We compared the risk of adverse events between hypertensive patients using valsartan and a propensity score-matched group using nonrecalled angiotensin receptor blockers and angiotensin-converting enzyme inhibitors.
We used Optums deidentified Clinformatics Datamart July 2017 January 2019. Hypertensive patients who received valsartan or nonrecalled angiotensin receptor blockers angiotensin converting enzyme inhibitors for 1 year before and on the recall date were compared. Primary outcomes were measured in the 6 months following the recall and included 1 a composite measure of all-cause hospitalization all-cause emergency department visit and all-cause urgent care visit and 2 a composite cardiac event measure of hospitalizations for acute myocardial infarction and hospitalizations emergency department visits urgent care visits for stroke transient ischemic attack heart failure or hypertension. We compared the risk of outcomes between treatment groups using Cox proportional hazard models. Of the hypertensive patients 76 934 received valsartan and 509 472 received a nonrecalled angiotensin receptor blocker angiotensin converting enzyme inhibitor. Valsartan use at the time of recall was associated with a higher risk of all-cause hospitalization emergency department use or urgent care use hazard ratio HR 1.02 95 percent CI 1.00 1.04 and the composite of cardiac events HR 1.22 95 percent CI 1.15 1.29 within 6 months after the recall.
The valsartan recall and shortage affected hypertensive patients. Local and national level systems need to be enhanced to protect patients from drug shortages by providing safe and reliable medication alternatives.
"""

# Call the function to extract entities and relationships for the abstract
results = extract_entities_relationships(abstract)

# Parse the string result into Python objects using json.loads()
try:
    parsed_result = json.loads(results)  # Parse the JSON into Python objects
except json.JSONDecodeError as e:
    print(f"Error parsing JSON: {e}")
    print(f"Problematic result: {results}")

# Insert the extracted entities and relationships into Neo4j
with driver.session() as session:
    entities = parsed_result['entities']
    relationships = parsed_result['relationships']
    create_knowledge_graph(session, entities, relationships)

# Close the Neo4j driver connection
driver.close()

