In [1]:
import os

from langchain.chains import GraphCypherQAChain
from langchain.prompts import PromptTemplate
from langchain_community.graphs import Neo4jGraph
from langchain_openai import ChatOpenAI

# Models for handling QA and Cypher query generation
QA_MODEL = os.getenv("QA_MODEL")
CYPHER_MODEL = os.getenv("CYPHER_MODEL")

# Connect to Neo4j database
graph = Neo4jGraph(
    url=os.getenv("NEO4J_URI"),
    username=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD"),
)


In [19]:
# Refresh schema to get the latest structure
graph.refresh_schema()

In [15]:
cypher_generation_template = """
Task:
Generate a Cypher query for a Neo4j graph database based on the provided schema and user question.

Instructions:
- Use only the relationship types and properties defined in the schema below.
- Do not introduce any new relationship types or properties.
- Do not include explanations, apologies, or any text outside the Cypher query.
- Ensure the direction of relationships is correct.
- Use proper aliasing for entities and relationships.
- Do not perform any operations that modify the database (e.g., CREATE, DELETE).
- Alias all intermediate statements using `WITH` clauses as necessary.

Schema:
{schema}

Nodes:
- Education(id, institution, location, degree, gpa, startDate, endDate, awards, thesisTitle)
- Course(CourseID, courseName, courseGrade, EducationID)


Relationships:
- (Education)-[:INCLUDED_IN]->(Course)

Example Questions and Cypher Queries:

# What courses are included in the Ph.D. program?
MATCH (e:Education {{degree: 'Ph.D. in Mechanical Engineering and Mechanics'}})-[:INCLUDED_IN]->(c:Course)
RETURN c.courseName AS course_name

# Which courses did Khayrul take during his M.S. degree?
MATCH (e:Education {{degree: 'M.S. in Mechanical Engineering and Mechanics'}})-[:INCLUDED_IN]->(c:Course)
RETURN c.CourseName AS course_name

# How many courses were completed for the B.Sc. degree?
MATCH (e:Education {{degree: 'B.Sc. in Industrial and Production Engineering'}})-[:INCLUDED_IN]->(c:Course)
RETURN COUNT(c) AS course_count

# What is Khayrul's GPA for his Ph.D.?
MATCH (e:Education {{degree: 'Ph.D. in Mechanical Engineering and Mechanics'}})
RETURN e.gpa AS gpa
LIMIT 1

String category values:
- Institutions: 'Lehigh University', 'IBM', 'Bangladesh University of Engineering and Technology'
- Locations: 'Pennsylvania, USA', 'Online', 'Dhaka, Bangladesh'
- Degrees: 'Ph.D. in Mechanical Engineering and Mechanics', 'M.S. in Mechanical Engineering and Mechanics',
            'Professional Certificate in Data Science', 'B.Sc. in Industrial and Production Engineering'
- Awards: 'P.C. Rossin College of Engineering fellowship', 'SCEA- PTAK prize global case study competition scholarship'

The question is:
{question}
"""

In [None]:
cypher_generation_prompt = PromptTemplate(
    input_variables=["schema", "question"], template=cypher_generation_template
)
print(cypher_generation_prompt.input_variables)

In [9]:

# Cypher generation template tailored for Papers and Skills
cypher_generation_template = """
Task:
Generate a Cypher query for a Neo4j graph database based on the provided schema and user question.

Instructions:
- Use only the relationship types and properties defined in the schema below.
- Do not introduce any new relationship types or properties.
- Do not include explanations, apologies, or any text outside the Cypher query.
- Ensure the direction of relationships is correct.
- Use proper aliasing for entities and relationships.
- Do not perform any operations that modify the database (e.g., CREATE, DELETE).
- Alias all intermediate statements using `WITH` clauses as necessary.

Schema:
{schema}

Nodes:
- Paper(id, title, abstract_novelty, abstract_challenge, abstract_result, keywords, issue, author, date, doi, journaltitle, pages, volume, contribution, first_author, publisher, url)
- Skill(id, skill, skill_type)

Relationships:
- (Paper)-[:UTILIZES]->(Skill)

Example Questions and Cypher Queries:
# What are the papers published by Khayrul as the first author?
MATCH (p:Paper)
WHERE p.first_author = 'True'
RETURN p.title AS paper_title

# What are the papers Khayrul published?
MATCH (p:Paper)
RETURN p.title AS paper_title

# List all skills associated with my papers.
MATCH (p:Paper)-[:UTILIZES]->(s:Skill)
RETURN DISTINCT s.skill AS skill_name

# How many paper Khayrul published during phd/ms?
MATCH (p:Paper)
WHERE p.date >= '2021-01-01'
RETURN COUNT(p) AS paper_count

# How many paper Khayrul published during bsc?
MATCH (p:Paper)
WHERE p.date <= '2021-01-01'
RETURN COUNT(p) AS paper_count

# What software was used in the paper titled "Tailoring polyamide nanocomposites: The synergistic effects of SWCNT chirality and maleic anhydride grafting"?
MATCH (p:Paper {{title: 'Tailoring polyamide nanocomposites: The synergistic effects of SWCNT chirality and maleic anhydride grafting'}})-[:UTILIZES]->(s:Skill)
WHERE s.skill_type = 'Design and Simulation Software'
RETURN s.skill AS software_used

# List all papers published in the journal "ACS Applied Engineering Materials".
MATCH (p:Paper)
WHERE p.journaltitle = 'ACS Applied Engineering Materials'
RETURN p.title AS paper_title, p.date AS publication_date

# What is the novelty of the paper titled "MIML: Multiplex Image Machine Learning for High Precision Cell Classification via Mechanical Traits within Microfluidic Systems"?
MATCH (p:Paper {{title: 'MIML: Multiplex Image Machine Learning for High Precision Cell Classification via Mechanical Traits within Microfluidic Systems'}})
RETURN p.abstract_novelty AS novelty

# Which programming languages do you know?
MATCH (s:Skill {{skill_type: 'Programming Languages'}})
RETURN s.skill AS skill_name



The question is:
{question}

"""

In [None]:
cypher_generation_prompt = PromptTemplate(
    input_variables=["schema", "question"], template=cypher_generation_template
)

print(cypher_generation_prompt.input_variables)

In [2]:
import os

from langchain.chains import GraphCypherQAChain
from langchain.prompts import PromptTemplate
from langchain_community.graphs import Neo4jGraph
from langchain_openai import ChatOpenAI

# Models for handling QA and Cypher query generation
QA_MODEL = os.getenv("QA_MODEL")
CYPHER_MODEL = os.getenv("CYPHER_MODEL")

# Connect to Neo4j database
graph = Neo4jGraph(
    url=os.getenv("NEO4J_URI"),
    username=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD"),
)

# Refresh schema to get the latest structure
graph.refresh_schema()

# Cypher generation template tailored for Papers and Skills

# Cypher generation template tailored for Papers and Skills
cypher_generation_template = """
Task:
Generate a Cypher query for a Neo4j graph database based on the provided schema and user question.

Instructions:
- Use only the relationship types and properties defined in the schema below.
- Do not introduce any new relationship types or properties.
- Do not include explanations, apologies, or any text outside the Cypher query.
- Ensure the direction of relationships is correct.
- Use proper aliasing for entities and relationships.
- Do not perform any operations that modify the database (e.g., CREATE, DELETE).
- Alias all intermediate statements using `WITH` clauses as necessary.

Schema:
{schema}

Nodes:
- Paper(id, title, abstract_novelty, abstract_challenge, abstract_result, keywords, issue, author, date, doi, journaltitle, pages, volume, contribution, first_author, publisher, url)
- Skill(id, skill, skill_type)

Relationships:
- (Paper)-[:UTILIZES]->(Skill)

Example Questions and Cypher Queries:
# What are the papers published by Khayrul as the first author?
MATCH (p:Paper)
WHERE p.first_author = 'True'
RETURN p.title AS paper_title

# What are the papers Khayrul published?
MATCH (p:Paper)
RETURN p.title AS paper_title

# List all skills associated with my papers.
MATCH (p:Paper)-[:UTILIZES]->(s:Skill)
RETURN DISTINCT s.skill AS skill_name

# How many paper Khayrul published during phd/ms?
MATCH (p:Paper)
WHERE p.date >= '2021-01-01'
RETURN COUNT(p) AS paper_count

# How many paper Khayrul published during bsc?
MATCH (p:Paper)
WHERE p.date <= '2021-01-01'
RETURN COUNT(p) AS paper_count

# What software was used in the paper titled "Tailoring polyamide nanocomposites: The synergistic effects of SWCNT chirality and maleic anhydride grafting"?
MATCH (p:Paper {{title: 'Tailoring polyamide nanocomposites: The synergistic effects of SWCNT chirality and maleic anhydride grafting'}})-[:UTILIZES]->(s:Skill)
WHERE s.skill_type = 'Design and Simulation Software'
RETURN s.skill AS software_used

# List all papers published in the journal "ACS Applied Engineering Materials".
MATCH (p:Paper)
WHERE p.journaltitle = 'ACS Applied Engineering Materials'
RETURN p.title AS paper_title, p.date AS publication_date

# What is the novelty of the paper titled "MIML: Multiplex Image Machine Learning for High Precision Cell Classification via Mechanical Traits within Microfluidic Systems"?
MATCH (p:Paper {{title: 'MIML: Multiplex Image Machine Learning for High Precision Cell Classification via Mechanical Traits within Microfluidic Systems'}})
RETURN p.abstract_novelty AS novelty

# Which programming languages do you know?
MATCH (s:Skill {{skill_type: 'Programming Languages'}})
RETURN s.skill AS skill_name



The question is:
{question}

"""

cypher_generation_prompt = PromptTemplate(
    input_variables=["schema", "question"], template=cypher_generation_template
)

# QA generation template for interpreting Cypher results related to Papers and Skills
qa_generation_template = """
You are an assistant that takes the results
from a Neo4j Cypher query and forms a human-readable response. The
query results section contains the results of a Cypher query that was
generated based on a user's natural language question. The provided
information is authoritative; you must never doubt it or try to use
your internal knowledge to correct it. Make the answer sound like a
response to the question.

Query Results:
{context}

Question:
{question}

Guidelines:
- If the provided information is empty (e.g., []), respond with: "I don't have the information to answer that question."
- If the information is not empty, provide a clear and concise answer using the results.
- All information pertains to your published papers and the skills utilized or gained.
- Never state that you lack information if query results are present.
- Include all relevant query results in your response if applicable.
- Md Khayrul Islam is the person whose published paper and skills is being queried. Any of the following can refer to him: 'Md Khayrul Islam', 'Md', 'Khayrul', 'Islam', 'Khayrul Islam', 'Md Khayrul', or 'Islam'. Additionally, 'Mr./Dr. Islam', 'Mr./Dr. Khayrul', or 'Mr./Dr. Khayrul Islam' can also be used.
- skills types are Programming Languages,  Tools and Libraries, Design and Simulation Software, Data Analysis and Machine Learning, Soft Skills.
- All tha papers and skills are related to Md Khayrul Islam. So you don't need paper.author or skill.author = Khayrul in your query.

Helpful Answer:
"""


qa_generation_prompt = PromptTemplate(
    input_variables=["context", "question"], template=qa_generation_template
)

# Initialize the GraphCypherQAChain with the OpenAI model and Neo4j graph
papers_chain = GraphCypherQAChain.from_llm(
    cypher_llm=ChatOpenAI(model=CYPHER_MODEL, temperature=0),
    qa_llm=ChatOpenAI(model=QA_MODEL, temperature=0),
    graph=graph,
    verbose=True,
    qa_prompt=qa_generation_prompt,
    cypher_prompt=cypher_generation_prompt,
    validate_cypher=True,
    top_k=100,
)

In [5]:
import os

from langchain.chains import GraphCypherQAChain
from langchain.prompts import PromptTemplate
from langchain_community.graphs import Neo4jGraph
from langchain_openai import ChatOpenAI

# Models for handling QA and Cypher query generation
QA_MODEL = os.getenv("QA_MODEL")
CYPHER_MODEL = os.getenv("CYPHER_MODEL")

# Connect to Neo4j database
graph = Neo4jGraph(
    url=os.getenv("NEO4J_URI"),
    username=os.getenv("NEO4J_USERNAME"),
    password=os.getenv("NEO4J_PASSWORD"),
)

# Refresh schema to get the latest structure
graph.refresh_schema()


cypher_generation_template = """
Task:
Generate a Cypher query for a Neo4j graph database based on the provided schema and user question.

Instructions:
- Use only the relationship types and properties defined in the schema below.
- Do not introduce any new relationship types or properties.
- Do not include explanations, apologies, or any text outside the Cypher query.
- Ensure the direction of relationships is correct.
- Use proper aliasing for entities and relationships.
- Do not perform any operations that modify the database (e.g., CREATE, DELETE).
- Alias all intermediate statements using `WITH` clauses as necessary.

Schema:
{schema}

Nodes:
- Education(id, institution, location, degree, gpa, startDate, endDate, awards, thesisTitle)
- Course(CourseID, courseName, courseGrade, EducationID)


Relationships:
- (Education)-[:INCLUDED_IN]->(Course)

Example Questions and Cypher Queries:

# What courses are included in the Ph.D. program?
MATCH (e:Education {{degree: 'Ph.D. in Mechanical Engineering and Mechanics'}})-[:INCLUDED_IN]->(c:Course)
RETURN c.courseName AS course_name

# Which courses did Khayrul take during his M.S. degree?
MATCH (e:Education {{degree: 'M.S. in Mechanical Engineering and Mechanics'}})-[:INCLUDED_IN]->(c:Course)
RETURN c.CourseName AS course_name

# How many courses were completed for the B.Sc. degree?
MATCH (e:Education {{degree: 'B.Sc. in Industrial and Production Engineering'}})-[:INCLUDED_IN]->(c:Course)
RETURN COUNT(c) AS course_count

# What is Khayrul's GPA for his Ph.D.?
MATCH (e:Education {{degree: 'Ph.D. in Mechanical Engineering and Mechanics'}})
RETURN e.gpa AS gpa
LIMIT 1

String category values:
- Institutions: 'Lehigh University', 'IBM', 'Bangladesh University of Engineering and Technology'
- Locations: 'Pennsylvania, USA', 'Online', 'Dhaka, Bangladesh'
- Degrees: 'Ph.D. in Mechanical Engineering and Mechanics', 'M.S. in Mechanical Engineering and Mechanics',
            'Professional Certificate in Data Science', 'B.Sc. in Industrial and Production Engineering'
- Awards: 'P.C. Rossin College of Engineering fellowship', 'SCEA- PTAK prize global case study competition scholarship'

The question is:
{question}
"""


cypher_generation_prompt = PromptTemplate(
    input_variables=["schema", "question"], template=cypher_generation_template
)

# QA generation template for interpreting Cypher results
qa_generation_template = """
You are an assistant that takes the results
from a Neo4j Cypher query and forms a human-readable response. The
query results section contains the results of a Cypher query that was
generated based on a users natural language question. The provided
information is authoritative, you must never doubt it or try to use
your internal knowledge to correct it. Make the answer sound like a
response to the question.

Query Results:
{context}

Question:
{question}

If the provided information is empty, say you don't know the answer.
Empty information looks like this: []

If the information is not empty, you must provide an answer using the
results. 

All the information you get thourgh query is about Khayrul's educational background.

Never say you don't have the right information if there is data in
the query results. Make sure to show all the relevant query results
if you're asked.

Helpful Answer:
"""

qa_generation_template = """
You are an assistant that takes the results
from a Neo4j Cypher query and forms a human-readable response. The
query results section contains the results of a Cypher query that was
generated based on a user's natural language question. The provided
information is authoritative; you must never doubt it or try to use
your internal knowledge to correct it. Make the answer sound like a
response to the question.

Query Results:
{context}

Question:
{question}

Guidelines:
- If the provided information is empty (e.g., []), respond with: "I don't have the information to answer that question."
- If the information is not empty, provide a clear and concise answer using the results.
- All information pertains to Khayrul's educational background.
- Never state that you lack information if query results are present.
- Include all relevant query results in your response if applicable.
- Md Khayrul Islam is the person whose educational background is being queried. Any of the following can refer to him: 'Md Khayrul Islam', 'Md', 'Khayrul', 'Islam', 'Khayrul Islam', 'Md Khayrul', or 'Islam'. Additionally, 'Mr./Dr. Islam', 'Mr./Dr. Khayrul', or 'Mr./Dr. Khayrul Islam' can also be used.
- B.Sc. or bsc stands for Bachelor of Science.
- M.S. or ms stands for Master of Science.
- Ph.D. or phd stands for Doctor of Philosophy.


Helpful Answer:
"""


qa_generation_prompt = PromptTemplate(
    input_variables=["context", "question"], template=qa_generation_template
)

# Initialize the GraphCypherQAChain with the OpenAI model and Neo4j graph
education_chain = GraphCypherQAChain.from_llm(
    cypher_llm=ChatOpenAI(model=CYPHER_MODEL, temperature=0),
    qa_llm=ChatOpenAI(model=QA_MODEL, temperature=0),
    graph=graph,
    verbose=True,
    qa_prompt=qa_generation_prompt,
    cypher_prompt=cypher_generation_prompt,
    validate_cypher=True,
    top_k=100,
)


In [6]:
print(education_chain.graph_schema)

Node properties are the following:
Education {id: STRING, institution: STRING, endDate: STRING, awards: STRING, thesisTitle: STRING, degree: STRING, gpa: STRING, location: STRING, startDate: STRING},Course {id: STRING, courseName: STRING, courseGrade: STRING, EducationID: STRING, courseID: STRING},Skill {id: INTEGER, skill: STRING, skill_type: STRING},Paper {id: STRING, date: STRING, abstract_novelty: STRING, keywords: STRING, issue: STRING, author: STRING, title: STRING, url: STRING, skills: STRING, volume: STRING, pages: STRING, contribution: STRING, first_author: STRING, abstract_result: STRING, publisher: STRING, journaltitle: STRING, abstract_challenge: STRING, doi: STRING}
Relationship properties are the following:

The relationships are the following:
(:Education)-[:INCLUDED_IN]->(:Course),(:Paper)-[:UTILIZES]->(:Skill)


In [7]:
# Example query you want to test
test_query ="What courses are included in the Ph.D. program?"
# Use the correct key 'query' instead of 'question'
response = education_chain.invoke({"query": test_query})

# Print the response from the chain, which includes the Cypher result
print(f"Response:\n{response}")




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (e:Education {degree: 'Ph.D. in Mechanical Engineering and Mechanics'})-[:INCLUDED_IN]->(c:Course)
RETURN c.courseName AS course_name[0m
Full Context:
[32;1m[1;3m[{'course_name': 'Computational Fluid Dynamics'}, {'course_name': 'Advance Manufacturing Science'}, {'course_name': 'Math Methods In Engr II'}, {'course_name': 'Advanced Fluid Mechanics'}, {'course_name': 'Math Methods In Engr I'}, {'course_name': 'Heat & Mass Transfer'}, {'course_name': 'Advanced Finite Elements'}, {'course_name': 'Intro to Mach Learning'}, {'course_name': 'General Examination'}][0m

[1m> Finished chain.[0m
Response:
{'query': 'What courses are included in the Ph.D. program?', 'result': 'The courses included in the Ph.D. program for Md Khayrul Islam are:\n- Computational Fluid Dynamics\n- Advance Manufacturing Science\n- Math Methods In Engr II\n- Advanced Fluid Mechanics\n- Math Methods In Engr I\n- Heat & Mass Tr

In [8]:
import os
from langchain import hub
from langchain.agents import AgentExecutor, Tool, create_openai_functions_agent
from langchain_openai import ChatOpenAI


HOSPITAL_AGENT_MODEL = os.getenv("HOSPITAL_AGENT_MODEL")

hospital_agent_prompt = hub.pull("hwchase17/openai-functions-agent")

tools = [
    Tool(
        name="EducationGraph",
        func=education_chain.invoke,  # Update the function name according to your chain
        description="""Useful for answering questions about Khayrul's education background, 
        including institutions, degrees, GPA, thesis titles, and courses taken. Use the entire 
        prompt as input to the tool. For instance, if the prompt is "What is Khayrul's GPA 
        for his Ph.D.?", the input should be "What is Khayrul's GPA for his Ph.D.?"
        """,
    ),
]

chat_model = ChatOpenAI(
    model=HOSPITAL_AGENT_MODEL,
    temperature=0,
)

hospital_rag_agent = create_openai_functions_agent(
    llm=chat_model,
    prompt=hospital_agent_prompt,
    tools=tools,
)

hospital_rag_agent_executor = AgentExecutor(
    agent=hospital_rag_agent,
    tools=tools,
    return_intermediate_steps=True,
    verbose=True,
)


  prompt = loads(json.dumps(prompt_object.manifest))


In [9]:
test_query ="What courses are included in the Ph.D. program?"
response = hospital_rag_agent_executor.invoke({"input": test_query})
print(response)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `EducationGraph` with `{'config': {'tags': ['Ph.D.', 'courses']}}`


[0m

[1m> Entering new GraphCypherQAChain chain...[0m


ValueError: Missing some input keys: {'query'}