# RAG-Chatbot

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


The following notebook creates a chatbot on the Insurance Documents in the Neo4j Database. This makes use of a RAG-architecture. The following configurations are set:

- **LLM**: To generate the response to clients an OpenAI model is used: [gpt-3.5-turbo-0125](https://platform.openai.com/docs/models/gpt-3-5-turbo).
- **Embedding Model**: For embeddings we use the same embeddings model (from OpenAI) that were used to create the vector index: [text-embedding-3-small](https://platform.openai.com/docs/models/embeddings).
- **The Retrieval**: This is done with Cypher queries in Neo4j.
- **Chatbot**: The chatbot is created with [Gradio](https://www.gradio.app/).

In [3]:
%pip install pypdf langchain_community langchain langchain_openai gradio IPython neo4j

Collecting pypdf
  Downloading pypdf-5.0.0-py3-none-any.whl.metadata (7.4 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.0-py3-none-any.whl.metadata (2.8 kB)
Collecting langchain
  Downloading langchain-0.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain_openai
  Downloading langchain_openai-0.2.0-py3-none-any.whl.metadata (2.6 kB)
Collecting gradio
  Downloading gradio-4.44.0-py3-none-any.whl.metadata (15 kB)
Collecting neo4j
  Downloading neo4j-5.24.0-py3-none-any.whl.metadata (5.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting langchain-core<0.4.0,>=0.3.0 (from langchain_community)
  Downloading langchain_core-0.3.5-py3-none-any.whl.metadata (6.3 kB)
Collecting langsmith<0.2.0,>=0.1.112 (from langchain_community)
  Downloading langsmith-0.1.128-py3-none-any.whl.metadata (13 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_communi

In [4]:
import pandas as pd
import os
from langchain_openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from neo4j import GraphDatabase
from dotenv import load_dotenv
import gradio as gr
import time
from IPython.display import display, HTML
import warnings
warnings.filterwarnings('ignore')

## Get Credentials

In [9]:
#if os.path.exists('credentials.env'):
#    load_dotenv('credentials.env', override=True)
#
#    # Neo4j
#    uri = os.getenv('NEO4J_URI')
#    username = os.getenv('NEO4J_USERNAME')
#    password = os.getenv('NEO4J_PASSWORD')
#    database = os.getenv('NEO4J_DATABASE')

#    # AI
#    OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
#    os.environ['OPENAI_API_KEY']=OPENAI_API_KEY

#    print("Credentials correctly read and stored.")
#else:
#    print("File 'credentials.env' not found.")

File 'credentials.env' not found.


In [19]:
#uri = userdata.get('NEO4J_URL_SANDBOX_MERCHANT')
#username = userdata.get('NEO4J_USR')
#password = userdata.get('NEO4J_PWD_SANDBOX_MERCHANT')
#database = 'neo4j'
#OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

from pprint import pprint
from google.colab import userdata
uri = 'neo4j+s://2490d4fa.databases.neo4j.io:7687'
username = 'neo4j'
password = 'smLww279BnwxwoAoFixJWANCoaQsdLwJ6bq5TE5U6hg'
database = 'neo4j'
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY']=OPENAI_API_KEY

## Setup Connection to Database

Setup connection to the database with the Python Driver

In [13]:
class App:
    def __init__(self, uri, user, password, database=None):
        self.driver = GraphDatabase.driver(uri, auth=(user, password), database=database)
        self.database = database

    def close(self):
        self.driver.close()

    def query(self, query):
        return self.driver.execute_query(query)

    def query_params(self, query, parameters):
        return self.driver.execute_query(query, parameters_=parameters)

    def count_nodes_in_db(self):
        query = "MATCH (n) RETURN COUNT(n)"
        result = self.query(query)
        (key, value) = result.records[0].items()[0]
        return value

    def remove_nodes_relationships(self):
        query ="""
            CALL apoc.periodic.iterate(
                "MATCH (c) RETURN c",
                "WITH c DETACH DELETE c",
                {batchSize: 1000}
            )
        """
        result = self.query(query)

    def remove_all_constraints(self):
        query ="""
            CALL apoc.schema.assert({}, {})
        """
        result = self.query(query)

In [14]:
app = App(uri, username, password, database)

In [15]:
app.count_nodes_in_db()

7772

## Create RAG-application

For the the chatbot we both need an Embedding-model and LLM. Create both below:

In [16]:
LLM = 'gpt-4.0'
embedding_model = 'text-embedding-3-small'

In [17]:
embedding_model = OpenAIEmbeddings(
    model=embedding_model,
    openai_api_key=OPENAI_API_KEY
)

In [20]:
llm = ChatOpenAI(temperature=0)
llm.model_name

'gpt-3.5-turbo'

### Retrieval Queries

To illustrate the difference between a "Regular" Vector Search and GraphRAG we create different retrieval queries.

In [21]:
def get_context_vector_search(search_prompt, client_name):
    query_vector = embedding_model.embed_query(search_prompt)

    similarity_query = """
        CALL db.index.vector.queryNodes("chunk-embeddings", 30, $query_vector) YIELD node, score
        WITH node as chunk, score ORDER BY score DESC
        MATCH (c:Client {name: $client_name})<-[:INSURES]-(i:Insurance)-[:HAS_POLICY]->(p:Policy)<-[:PART_OF]-(chunk)
        RETURN score, i.name AS insurance_name, p.file_name as file_name, chunk.page as page, chunk.chunk AS chunk
       """
    results = app.query_params(similarity_query, {'query_vector': query_vector, 'client_name': client_name})
    context = "Related documents: \n\n" + "\n\n".join(["file_name: " + record['file_name'] + "\n" + "page: " + str(record['page'] + 1) + "\n" + "text: " + record['chunk'] + "\n" for record in results.records])
    return context

In [22]:
def get_context_graphrag(search_prompt, client_name):
    query_vector = embedding_model.embed_query(search_prompt)

    similarity_query = """
        CALL db.index.vector.queryNodes("chunk-embeddings", 30, $query_vector) YIELD node, score
        WITH node as chunk, score ORDER BY score DESC
        MATCH (c:Client {name: $client_name})<-[:INSURES]-(i:Insurance)-[:HAS_POLICY]->(p:Policy)<-[:PART_OF]-(chunk)
        OPTIONAL MATCH (chunk)<-[:NEXT]-(previous {page: chunk.page})
        OPTIONAL MATCH (chunk)-[:NEXT]->(next {page: chunk.page})
        RETURN
            score,
            i.name AS insurance_name,
            p.file_name as file_name,
            chunk.page as page,
            previous.chunk AS previous_chunk,
            chunk.chunk AS chunk,
            next.chunk AS next_chunk,
            previous.id as previous_id,
            chunk.id as chunk_id,
            next.id as next_id
       """
    results = app.query_params(similarity_query, {'query_vector': query_vector, 'client_name': client_name})
    context = "Related documents: \n\n" + "\n\n".join(["file_name: " + record['file_name'] + "\n" + "page: " + str(record['page'] + 1) + "\n" + "text: " + " ".join([x for x in [str(y) for y in [record['previous_chunk'], record['chunk'], record['next_chunk']] if y is not None]]) + "\n" for record in results.records])

    chunk_ids = [[record['previous_id'], record['chunk_id'], record['next_id']] for record in results.records]
    chunk_ids = list(set([x for xs in chunk_ids for x in xs if x is not None]))
    definition_query = """
        MATCH (c:Chunk)-[:MENTIONS]-(d:Definition)
        WHERE c.id in $chunk_ids
        WITH DISTINCT d as d
        RETURN d.definition as definition, d.description as description
    """
    results = app.query_params(definition_query, {'chunk_ids': chunk_ids})
    definitions = "Related definitions: \n\n" + "\n\n".join([record['definition'] + ":" + str(record['description'])for record in results.records])
    return context, definitions

Function to retrieve the client name from a client id

In [None]:
# Query to find the retrieval based on a question
# debug_query = """
#     WITH "<QUESTION>" as q
#     WITH genai.vector.encode(q, "OpenAI", {token: $openaikey, model: $model}) as embedding
#     CALL db.index.vector.queryNodes("chunk-embeddings", 30, embedding) YIELD node, score
#     WITH node as chunk, score ORDER BY score DESC
#     MATCH (c:Client {id: 0})<-[:INSURES]-(i:Insurance)-[:HAS_POLICY]->(p:Policy)<-[:PART_OF]-(chunk)
#     OPTIONAL MATCH (chunk)<-[:NEXT]-(previous {page: chunk.page})
#     OPTIONAL MATCH (chunk)-[:NEXT]->(next {page: chunk.page})
#     RETURN score, i.name AS insurance_name, p.file_name as file_name, chunk.page as page, previous.chunk AS previous_chunk, chunk.chunk AS chunk, next.chunk AS next_chunk
# """

### Prompts

Prompt for vector search (without definitions)

In [23]:
def generate_prompt_vector_search(search_prompt, context, user_name):
    prompt_template = """

    You are a chatbot on insurance policies. Your goal is to help people with questions on insurance policies.
    A user will come to you with questions on their policy. Their questions must be answered based on the relevant documents of the policy.
    The user is {user_name} is given and you start your response with 'Hi {user_name}'.
    The policy text is given in the context. Base your answers only and only on these policy documents. You can create an answer in normal language but don't come up with any policy yourself which is not described in one of the documents.
    If questions can not be answered based on the provided context you say 'I'm sorry but I cannot answer this question while looking into your policy: policy_name'. Where policy name comes from the context.
    If the context is empty you say: "I'm sorry but I cannot any relevance in into your policy: policy_name". Where policy name comes from the context. Don't list any relevant pages here.
    You always start your response with an answer on the question asked. Unless the question was empty or out of scope you end your answer with two new line and list all document with the referenced pages. You state this as: References: -file_name, page: page_number. List each page only once and in increasing order.
    If a question is not relevant to insurance policies or out of scope then always send the following answer: "I'm an insurance chatbot and can only provide answers relevant to insurances". Never let you be used for anything else. But only do this if a question is out of scope.

    The question is the following:
    {search_prompt}
    Always respond in the language in which the question was asked. So, do not respond in a different language.

    The context is the following:
    {context}


    """
    prompt = PromptTemplate.from_template(prompt_template)

    theprompt = prompt.format_prompt(search_prompt=search_prompt, context=context, user_name=user_name)
    return theprompt

Prompt for GraphRAG

In [24]:
def generate_prompt_graphrag(search_prompt, context, definitions, user_name):
    prompt_template = """

    You are a chatbot on insurance policies. Your goal is to help people with questions on insurance policies.
    A user will come to you with questions on their policy. Their questions must be answered based on the relevant documents of the policy.
    The user is {user_name} is given and you start your response with 'Hi {user_name}'.
    The policy text is given in the context. In addition definitions are given. Base your answers only and only on these policy documents and definitions. You can create an answer in normal language but don't come up with any policy yourself which is not described in one of the documents.
    If questions can not be answered based on the provided context you say 'I'm sorry but I cannot answer this question while looking into your policy: policy_name'. Where policy name comes from the context.
    If the context is empty you say: "I'm sorry but I cannot any relevance in into your policy: policy_name". Where policy name comes from the context. Don't list any relevant pages here.
    You always start your response with an answer on the question asked. Unless the question was empty or out of scope you end your answer with two new line and list all document with the referenced pages. You state this as: References: -file_name, page: page_number. List each page only once and in increasing order.
    If a question is not relevant to insurance policies or out of scope then always send the following answer: "I'm an insurance chatbot and can only provide answers relevant to insurances". Never let you be used for anything else. But only do this if a question is out of scope.

    The question is the following:
    {search_prompt}
    Always respond in the language in which the question was asked. So, do not respond in a different language.

    The context is the following:
    {context}

    The definitions are the following:
    {definitions}

    """
    prompt = PromptTemplate.from_template(prompt_template)

    theprompt = prompt.format_prompt(search_prompt=search_prompt, context=context, definitions=definitions, user_name=user_name)
    return theprompt

## Some examples to test the models

For every example there can be chosen between GraphRAG and vector search.

In [25]:
search_prompt = 'When am I entitled to be transported by an ambulance?'
client_name = "Hylke"

context = get_context_vector_search(search_prompt, client_name)
theprompt = generate_prompt_vector_search(search_prompt, context, client_name)
llm(theprompt.to_messages()).pretty_print()


Hi Hylke,

You are entitled to be transported by an ambulance if the ambulance transport is approved by the emergency control center and ordered by a general practitioner, medical specialist, geriatric specialist, doctor specialized in treating people with an intellectual disability, or a pediatrician. There must be a medical necessity for the transport, and you must not have to travel more than 200 kilometers to your care provider.

References: 
- zilveren_kruis_basis_2024.pdf, page: 42

- zilveren_kruis_basis_2024.pdf, page: 43


In [26]:
search_prompt = 'When am I entitled to be transported by an ambulance?'
client_name = "Hylke"

context, definitions = get_context_graphrag(search_prompt, client_name)
theprompt = generate_prompt_graphrag(search_prompt, context, definitions, client_name)
llm(theprompt.to_messages()).pretty_print()


Hi Hylke, 

You are entitled to be transported by an ambulance in the following situations:
- To and from a care provider or institution if the care provided is partially or entirely reimbursed by the basic insurance
- To an institution if the costs of your stay are covered by the Dutch Long-term Care Act (Wlz)
- From a Wlz institution to a care provider or institution for examination or treatment reimbursed under the Wlz
- From a Wlz institution to a care provider or institution for prosthesis fitting reimbursed under the Wlz
- From care providers or institutions to your home if you cannot receive care at home
- To a care provider for mental healthcare for an insured person under 18 reimbursed under the Dutch Youth Act

Ambulance transport must be approved by the emergency control center and requested by specific medical professionals. You must have a medical necessity for transport and not have to travel more than 200 kilometers to your care provider.

References: 
- zilveren_kruis_

In [None]:
# search_prompt = 'On which conditions is my dentist visit fully paid back?'
# client_name = "Marco"

# context, definitions = get_context_graphrag(search_prompt, client_name)
# theprompt = generate_prompt_graphrag(search_prompt, context, definitions, client_name)
# llm(theprompt.to_messages()).pretty_print()

In [None]:
# search_prompt = 'What is my reimbursement for incontinence products'
# client_name = "Marco"

# context, definitions = get_context_graphrag(search_prompt, client_name)
# theprompt = generate_prompt_graphrag(search_prompt, context, definitions, client_name)
# llm(theprompt.to_messages()).pretty_print()

In [None]:
# search_prompt = 'What is the definition of abroad?'
# client_name = "Marco"

# context, definitions = get_context_graphrag(search_prompt, client_name)
# theprompt = generate_prompt_graphrag(search_prompt, context, definitions, client_name)
# llm(theprompt.to_messages()).pretty_print()

In [None]:
# search_prompt = 'What is meant with Consultation?'
# client_name = "Marco"

# context, definitions = get_context_graphrag(search_prompt, client_name)
# theprompt = generate_prompt_graphrag(search_prompt, context, definitions, client_name)
# llm(theprompt.to_messages()).pretty_print()

----------------
----------------


## Gradio Chatbot that uses RAG and GraphRAG

Example code is coming from Gradio documentation: [Creating a custom chatbot with blocks](https://www.gradio.app/guides/creating-a-custom-chatbot-with-blocks#add-streaming-to-your-chatbot)

In [27]:
def user(user_message, history):
    return "", history + [[user_message, None]]

def get_answer(search_prompt, client_name, rag_method):
    if rag_method == "Vector-Search":
        context = get_context_vector_search(search_prompt, client_name)
        theprompt = generate_prompt_vector_search(search_prompt, context, client_name)
    else:
    # rag_method == "GraphRAG"
        context, definitions = get_context_graphrag(search_prompt, client_name)
        theprompt = generate_prompt_graphrag(search_prompt, context, definitions, client_name)
    messages = llm(theprompt.to_messages())
    return messages.content

def bot(history, client_name, rag_method):
    bot_message = get_answer(history[-1][0], client_name, rag_method)
    history[-1][1] = ""
    for character in bot_message:
        history[-1][1] += character
        time.sleep(0.01)
        yield history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        label="Chatbot with RAG",
        avatar_images=["https://png.pngtree.com/png-vector/20220525/ourmid/pngtree-concept-of-facial-animal-avatar-chatbot-dog-chat-machine-illustration-vector-png-image_46652864.jpg","https://d-cb.jc-cdn.com/sites/crackberry.com/files/styles/larger/public/article_images/2023/08/openai-logo.jpg"]
    )
    msg = gr.Textbox(label="Message")
    client_name = gr.Textbox(label="Client Name")
    rag_method = gr.Radio(["Vector-Search", "GraphRAG"], label="RAG-method:")
    clear = gr.Button("Clear")


    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, [chatbot, client_name, rag_method], chatbot
    )
    client_name.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, [chatbot, client_name, rag_method], chatbot
    )

    clear.click(lambda: None, None, chatbot, queue=False)


demo.queue()
demo.launch(share=False)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>



If you want to have the light-mode for the chatbot paste the following after the URL: /?__theme=light