In [1]:
from flask import Flask, render_template, request, jsonify
from neo4j import GraphDatabase
import spacy
import pandas as pd
import re


app = Flask(__name__)

# Load the spaCy English model
nlp = spacy.load("en_core_web_sm")

# Set up the Neo4j driver
driver = GraphDatabase.driver(uri="bolt://localhost:7687", auth=("neo4j", "projects"))


In [2]:
def determine_question_type(nlp_question):
    from spacy.lang.en import English

    nlp = English()
    doc = nlp_question
    print(doc)
    question_type = "unknown"
    
   
    # Identify Type 1 questions
    if any((token.text in {"diseases"}) for token in doc):
        if "related" in [token.text for token in doc]:
            question_type = "Type 1"
        elif "cause" in [token.text for token in doc]:
            question_type = "Type 1"
     # Identify Type 1 questions
    elif any((token.text in {"antibiotics", "drug"}) for token in doc):
        question_type = "Type 2"
                
    elif "causes" in [token.text for token in doc] and "risk" in [token.text for token in doc]:
        question_type = "Type 3"
            
    return question_type

In [3]:
def generate_cypher_query(nlp_question,q_type):
        doc = nlp(nlp_question)
        cypher_query = ""
        # Read the Excel file
        df = pd.read_csv("final_diseases_dataset_v2.csv")

        # Create an empty list to store the unique entries
        First_Item_Name = []
        Second_Item_Name = []

        # Iterate over each column in the DataFrame
        for col in df.columns:
            # Iterate over each value in the column
            for value in df['First_Item_Name']:
                # Check if the value is not already in the unique list
                if value not in First_Item_Name:
                    # If the value is unique, add it to the list
                    First_Item_Name.append(value)
            # Iterate over each value in the column
            for value in df['Second_Item_Name']:
                # Check if the value is not already in the unique list
                if value not in Second_Item_Name:
                    # If the value is unique, add it to the list
                    Second_Item_Name.append(value)


        # Define regular expressions for the target words
        regexes = [re.compile(fr'{word}', re.IGNORECASE) for word in Second_Item_Name]
        print(Second_Item_Name)
      
       # Find matching words in Second_Item_Name
        matches = []
        for regex in regexes:
            match = regex.search(nlp_question)
            if match:
                matches.append(regex.pattern)


        print(matches)
        first_item_name_ = ''
        second_item_name_ = ''
        # Get the first and second item names
        for i in range(len(matches)):
            if i == 0:
                first_item_name_ = matches[0]
            elif i == 1:
                second_item_name_ = matches[1]

                

        print("first_item_name_ = " + first_item_name_)
        print("second_item_name_ = " + second_item_name_)
        
        
        if q_type == "Type 1":
            cypher_query = f"MATCH (e:First_Item)-[r:Relation]->(c:Second_Item) WHERE c.Name =~ '{first_item_name_}' OPTIONAL MATCH (e)<-[r_inv:InverseRelation]-(c) RETURN e, r, c, r_inv"
            print("Type 1",cypher_query)
        elif q_type == "Type 2":
            cypher_query = f"MATCH (e:First_Item)-[r:Relation]->(c:Second_Item) WHERE c.Name =~ '{first_item_name_}' MATCH (e)-[dr:Relation]->(f) WHERE f.Name =~ '{second_item_name_}' RETURN e, r, c, dr, f"
            print("Type 2",cypher_query)
        elif q_type == "Type 3":
            cypher_query = f"""MATCH (n:First_Item)-[r:Relation{{type: "may cause"}}]->(c:Second_Item) WHERE c.Name =~ "{first_item_name_}" MATCH (f)-[dr:Relation{{type: "is a risk factor for"}}]->(c) RETURN n, f, c, r, dr"""

            print("Type 3",cypher_query)
        
        return cypher_query
            
                




In [None]:

# Define the route for the home page
@app.route('/')
def home():
    return render_template('index.html')



# Define the route for the Cypher query - Student
@app.route('/convert_nlp_to_cypher', methods=['POST'])
def stu_query():
    # Get the natural language query from the HTML form
    query_text = request.form.get('nlp_query')
    print("query_text",query_text)
    # Parse the natural language query using spaCy
    doc = nlp(query_text)
    doc_lower = nlp(query_text.lower())
    print("Parse the natural language query using spaCy",doc)
    #determine Question type
    quesType = determine_question_type(doc_lower)
    print("quesType",quesType)
    

    
    
    # Generate Cypher query
    cypher_query = generate_cypher_query(query_text,quesType)
    print(cypher_query)
    # Return the result as a JSON object
    return cypher_query


if __name__ == '__main__':
    app.run(port=8080)


 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8080/ (Press CTRL+C to quit)
127.0.0.1 - - [09/May/2023 09:44:30] "GET / HTTP/1.1" 200 -


query_text What are the causes and risk factors of Acne vulgaris
Parse the natural language query using spaCy What are the causes and risk factors of Acne vulgaris


127.0.0.1 - - [09/May/2023 09:44:40] "POST /convert_nlp_to_cypher HTTP/1.1" 200 -


what are the causes and risk factors of acne vulgaris
quesType Type 3
['Cardiac failure, high output', 'Hypertension, systemic', 'Hematological abnormality', 'Breathlessness', 'Chest pain', 'Copper levels raised (serum or plasma)', 'Headache', 'Hypopigmentation', 'Menorrhagia', 'Flow murmur', 'Roth spots', 'Skin discoloration', 'Anemia', 'Uric acid levels raised (plasma or serum)', 'Hepatic failure', 'Red cell production reduced', 'Rifampicin', 'Sideroblasts', 'Urticaria', 'Arthropathy', 'Pyrexia', 'Hepatocellular jaundice', 'Hepatomegaly', 'Rash', 'Drugs, hormones and biological mediators', 'Antibiotics', 'Pyrazinamide', 'Acneiform lesions', 'Cutaneous abscess', 'Bacteria and bacterial diseases', 'Acne vulgaris']
['Acne vulgaris']
first_item_name_ = Acne vulgaris
second_item_name_ = 
Type 3 MATCH (n:First_Item)-[r:Relation{type: "may cause"}]->(c:Second_Item) WHERE c.Name =~ "Acne vulgaris" MATCH (f)-[dr:Relation{type: "is a risk factor for"}]->(c) RETURN n, f, c, r, dr
MATCH (n:First

127.0.0.1 - - [09/May/2023 09:45:56] "GET / HTTP/1.1" 200 -


query_text What diseases are related to Anemia
Parse the natural language query using spaCy What diseases are related to Anemia


127.0.0.1 - - [09/May/2023 09:46:00] "POST /convert_nlp_to_cypher HTTP/1.1" 200 -


what diseases are related to anemia
quesType Type 1
['Cardiac failure, high output', 'Hypertension, systemic', 'Hematological abnormality', 'Breathlessness', 'Chest pain', 'Copper levels raised (serum or plasma)', 'Headache', 'Hypopigmentation', 'Menorrhagia', 'Flow murmur', 'Roth spots', 'Skin discoloration', 'Anemia', 'Uric acid levels raised (plasma or serum)', 'Hepatic failure', 'Red cell production reduced', 'Rifampicin', 'Sideroblasts', 'Urticaria', 'Arthropathy', 'Pyrexia', 'Hepatocellular jaundice', 'Hepatomegaly', 'Rash', 'Drugs, hormones and biological mediators', 'Antibiotics', 'Pyrazinamide', 'Acneiform lesions', 'Cutaneous abscess', 'Bacteria and bacterial diseases', 'Acne vulgaris']
['Anemia']
first_item_name_ = Anemia
second_item_name_ = 
Type 1 MATCH (e:First_Item)-[r:Relation]->(c:Second_Item) WHERE c.Name =~ 'Anemia' OPTIONAL MATCH (e)<-[r_inv:InverseRelation]-(c) RETURN e, r, c, r_inv
MATCH (e:First_Item)-[r:Relation]->(c:Second_Item) WHERE c.Name =~ 'Anemia' OPTIONA

127.0.0.1 - - [09/May/2023 09:46:25] "POST /convert_nlp_to_cypher HTTP/1.1" 200 -


what antibiotics drugs may cause hepatic failure
quesType Type 2
['Cardiac failure, high output', 'Hypertension, systemic', 'Hematological abnormality', 'Breathlessness', 'Chest pain', 'Copper levels raised (serum or plasma)', 'Headache', 'Hypopigmentation', 'Menorrhagia', 'Flow murmur', 'Roth spots', 'Skin discoloration', 'Anemia', 'Uric acid levels raised (plasma or serum)', 'Hepatic failure', 'Red cell production reduced', 'Rifampicin', 'Sideroblasts', 'Urticaria', 'Arthropathy', 'Pyrexia', 'Hepatocellular jaundice', 'Hepatomegaly', 'Rash', 'Drugs, hormones and biological mediators', 'Antibiotics', 'Pyrazinamide', 'Acneiform lesions', 'Cutaneous abscess', 'Bacteria and bacterial diseases', 'Acne vulgaris']
['Hepatic failure', 'Antibiotics']
first_item_name_ = Hepatic failure
second_item_name_ = Antibiotics
Type 2 MATCH (e:First_Item)-[r:Relation]->(c:Second_Item) WHERE c.Name =~ 'Hepatic failure' MATCH (e)-[dr:Relation]->(f) WHERE f.Name =~ 'Antibiotics' RETURN e, r, c, dr, f
MATCH 