## Installing the basics required

In [24]:

import os
import pickle
from dotenv import load_dotenv
import spacy
import nltk
import vertexai
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sentence_transformers import SentenceTransformer
from langchain_google_vertexai import VertexAI
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.schema.document import Document
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain_core.messages import AIMessage, HumanMessage
from googletrans import Translator

# Download NLTK resources
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

# Load environment variables
load_dotenv()
api_key = os.getenv('API_KEY')
os.environ['GOOGLE_API_KEY'] = api_key
project_id = "mimetic-fulcrum-407320"
vertexai.init(project=project_id, location="us-central1")


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/adarshbhattarai/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/adarshbhattarai/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/adarshbhattarai/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Preprocess text functions

In [35]:

# Load environment variables
load_dotenv()
api_key = os.getenv('API_KEY')
os.environ['GOOGLE_API_KEY'] = api_key
project_id = "mimetic-fulcrum-407320"
vertexai.init(project=project_id, location="us-central1")

class Content:
    def __init__(self, url, paragraphs):
        self.url = url
        self.paragraphs = paragraphs

with open('allContent.pkl', 'rb') as f:
    all_content = pickle.load(f)

# Preprocess text functions
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    tokens = word_tokenize(text)
    lemmatized_tokens = [lemmatizer.lemmatize(token.lower()) for token in tokens]
    return " ".join(lemmatized_tokens)

stop_words = set(stopwords.words('english'))

def remove_stopwords(text):
    tokens = word_tokenize(text)
    filtered_tokens = [token for token in tokens if token.lower() not in stop_words]
    return " ".join(filtered_tokens)

# Initialize knowledge base
knowledge_base = {}
for content in all_content:
    knowledge_base[content.url] = content.paragraphs

# Preprocess paragraphs
for url, paragraphs in knowledge_base.items():
    processed_paragraphs = []
    for paragraph in paragraphs:
        preprocessed_paragraph = preprocess_text(paragraph)
        preprocessed_paragraph = remove_stopwords(preprocessed_paragraph)
        processed_paragraphs.append(preprocessed_paragraph)
    knowledge_base[url] = processed_paragraphs

# Initialize embeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# Initialize vector store
vectorstore = FAISS.from_texts(
    [paragraph for paragraphs in knowledge_base.values() for paragraph in paragraphs],
    embeddings,
)

# Initialize retriever
retriever = vectorstore.as_retriever()

# Initialize conversational memory
memory = ConversationBufferMemory(memory_key="chat_history")

gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
all_documents = []

for url, paragraphs in knowledge_base.items():
    for paragraph in paragraphs:
        if paragraph.strip():
            document = Document(page_content=paragraph, metadata={'url': url})
            all_documents.append(document)



In [48]:

db = FAISS.from_documents(all_documents, gemini_embeddings)

user_prompt_model = """Given the user query {query} , present your answer in 3 sentences and make it as clear and concise."""

rephrase_prompt_model = """Given the user query {query} , present your answer in 3 sentences and make it as clear and concise. Be context aware as user has sent history of conversation as well."""

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context and latest history if present to answer "
    "the question. Use three sentences maximum and keep the answer concise. If you don't know the answer, just reply 'NoIdea'."
    "\n\n{context}"
)

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "use three sentences maximum and keep the answer concise, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

chain_of_thought_prompt = (
    "You will act as a professional and knowledgeable dentist. The user, acting as the patient, will provide their details. "
    "Your role is to diagnose potential dental issues and suggest the best course of action based on their condition. "
    "Use the available knowledge base to provide accurate, context-aware answers, considering the patient's name, age, lifestyle, "
    "smoker/non-smoker status, medical condition, allergy history, and current context. If the answer is not found in the uploaded content, use Google's LLM. "
    "Follow these steps:\n\n"
    "1. Summarize the patient's details: age, name, symptoms, medical condition, allergy history, smoker/non-smoker status, and constraints if present.\n"
    "2. Ask about their last dental check-up, any previous procedures, or ongoing dental care.\n"
    "3. Identify and list possible causes for the patient's symptoms.\n"
    "4. Explain the reasoning behind each possible diagnosis.\n"
    "5. Use a decision-tree approach to narrow down the most probable cause.\n"
    "6. Suggest conventional treatments and provide home remedies and natural alternatives.\n"
    "7. Explain why each treatment is recommended.\n"
    "8. Specify treatment times and the importance of follow-up appointments.\n"
    "9. Consider the patient's age, lifestyle, medical history, and smoker/non-smoker status.\n"
    "10. Conduct a risk assessment and recommend preventive measures.\n"
    "11. Educate the patient on proper oral care techniques and emphasize the importance of regular checkups.\n\n"
    "The response should be clear, concise, and easy to understand. Use paragraphs for general information and bullet points for oral care education."
)

combined_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", chain_of_thought_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
        ("context", "{context}"), 
    ]
)

combined_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", chain_of_thought_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


ValueError: Unexpected message type: context. Use one of 'human', 'user', 'ai', 'assistant', or 'system'.

In [None]:

class LLM:
    def __init__(self):
        self.model = VertexAI(model_name="gemini-1.0-pro-001")

    def combine_docs_chain(self):
        system_prompt = chain_of_thought_prompt  
        prompt = ChatPromptTemplate.from_messages([
            ("system", system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ])
        return create_stuff_documents_chain(llm=self.model, prompt=prompt)


In [49]:

class DentalChatbot:
    def __init__(self):
        self.llm = LLM()
        self.model = self.llm.model
        self.retriever = db.as_retriever()
        self.chat_history = []
        self.feedback_counter = 0
        self.translator = Translator()

    def get_rag_chain(self):
        chain = self.llm.combine_docs_chain()
        return create_retrieval_chain(self.retriever, chain)

    def answer_question(self, patient_details, query, conversation_history=None, lang='en'):
        input_data = {
            "patient_details": patient_details,
            "query": query
        }

        if conversation_history:
            history_aware_retriever = create_history_aware_retriever(
                self.model, self.retriever, contextualize_q_prompt
            )
            qa_prompt = ChatPromptTemplate.from_messages(
                [
                    ("system", system_prompt),
                    MessagesPlaceholder("chat_history"),
                    ("human", "{input}"),
                ]
            )
            question_answer_chain = create_stuff_documents_chain(self.model, qa_prompt)
            rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
            result = rag_chain.invoke({"input": input_data, "chat_history": conversation_history})
            answer = result['answer']
        else:
            chain = self.get_rag_chain()
            result = chain.invoke({"input": input_data})
            answer = result['answer']

        if answer == 'NoIdea':
            ai_prompt = PromptTemplate.from_template(user_prompt_model)
            chain = ai_prompt | self.llm.model
            answer = chain.invoke({"query": query})

        detected_lang = self.translator.detect(query).lang
        if detected_lang != 'en':
            answer = self.translator.translate(answer, src='en', dest=detected_lang).text

        return answer

    def interact(self):
        while True:
            name = input("Enter your name: ")
            age = input("Enter your age: ")
            symptoms = input("Describe your symptoms: ")
            medical_condition = input("Do you have any underlying medical conditions? (yes/no): ")
            if medical_condition.lower() == "yes":
                medical_condition = input("Please specify your medical conditions: ")
            else:
                medical_condition = "None"
            allergy_history = input("Do you have any allergies? (yes/no): ")
            if allergy_history.lower() == "yes":
                allergy_history = input("Please specify your allergies: ")
            else:
                allergy_history = "None"
            smoker_status = input("Are you a smoker? (yes/no): ")
            current_dental_history = input("When was your last dental check-up? Have you had any major procedures? ")

            patient_details = {
                "name": name,
                "age": age,
                "symptoms": symptoms,
                "medical_condition": medical_condition,
                "allergy_history": allergy_history,
                "smoker_status": smoker_status,
                "current_dental_history": current_dental_history
            }

            question = input("Please ask your dental question: ")
            if question.lower() == "quit":
                break
            response = self.answer_question(patient_details=patient_details, query=question, conversation_history=self.chat_history)
            print("Response:", response)
            self.chat_history.extend([
                HumanMessage(content=question),
                AIMessage(content=response),
            ])

            self.feedback_counter += 1
            if self.feedback_counter % 5 == 0:
                user_feedback = input("Was this answer helpful? (yes/no): ")
                if user_feedback.lower() == "yes":
                    continue
                elif user_feedback.lower() == "no":
                    new_query = input("I'm sorry. Can you please provide more details or ask another question: ")
                    if new_query:
                        response = self.answer_question(patient_details=patient_details, query=new_query, conversation_history=self.chat_history)
                        print("Response:", response)
                        self.chat_history.extend([
                            HumanMessage(content=new_query),
                            AIMessage(content=response),
                        ])
                    else:
                        print("Okay, let me know if you have any other questions.")
                        continue


In [50]:

if __name__ == "__main__":
    chatbot = DentalChatbot()
    chatbot.interact()


Enter your name:  aasthu
Enter your age:  20
Describe your symptoms:  pain
Do you have any underlying medical conditions? (yes/no):  no
Do you have any allergies? (yes/no):  no
Are you a smoker? (yes/no):  no
When was your last dental check-up? Have you had any major procedures?  2 years back
Please ask your dental question:  i saw my gums were bleeding while i was brushing my teeth in the morning. i am having sensitivity since then. how do i treat this at home?


ValueError: Prompt must accept context as an input variable. Received prompt with input variables: ['chat_history', 'input']

In [27]:
# Prompts
few_shot_prompt = """
You are a knowledgeable and helpful dental assistant. Your task is to answer questions based on the provided context, 
ensuring that responses are accurate and relevant. Use a generous and formal tone, and always conclude by suggesting
that the user visit a professional dentist or book an appointment for the best results. Assist step by step if necessary.

User history is like follow:

{name} visited dental {dental_visits} and has this medical history: {history}

Suggest your answer based on user history. Always start by greeting with {name}

{context}

Q: {input}
A: 
"""

user_prompt_model = """Given the user query {query} , present your answer in 3 sentences and make it as clear and concise."""

rephrase_prompt_model = """Given the user query {query} , present your answer in 3 sentences and make it as clear and concise. Be context aware as user has sent history of conversation as well."""

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context and latest history if present to answer "
    "the question. Use three sentences maximum and keep the answer concise. If you don't know the answer, just reply 'NoIdea'."
    "\n\n{context}"
)

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "use three sentences maximum and keep the answer concise, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

chain_of_thought_prompt = (
    "You will act as a professional and knowledgeable dentist. The user, acting as the patient, will provide their details. "
    "Your role is to diagnose potential dental issues and suggest the best course of action based on their condition. "
    "Use the available knowledge base to provide accurate, context-aware answers, considering the patient's name, age, lifestyle, "
    "smoker/non-smoker status, medical condition, allergy history, and current context. If the answer is not found in the uploaded content, use Google's LLM. "
    "Follow these steps:\n\n"
    "1. Summarize the patient's details: age, name, symptoms, medical condition, allergy history, smoker/non-smoker status, and constraints if present.\n"
    "2. Ask about their last dental check-up, any previous procedures, or ongoing dental care.\n"
    "3. Identify and list possible causes for the patient's symptoms.\n"
    "4. Explain the reasoning behind each possible diagnosis.\n"
    "5. Use a decision-tree approach to narrow down the most probable cause.\n"
    "6. Suggest conventional treatments and provide home remedies and natural alternatives.\n"
    "7. Explain why each treatment is recommended.\n"
    "8. Specify treatment times and the importance of follow-up appointments.\n"
    "9. Consider the patient's age, lifestyle, medical history, and smoker/non-smoker status.\n"
    "10. Conduct a risk assessment and recommend preventive measures.\n"
    "11. Educate the patient on proper oral care techniques and emphasize the importance of regular checkups.\n\n"
    "The response should be clear, concise, and easy to understand. Use paragraphs for general information and bullet points for oral care education."
)

combined_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", chain_of_thought_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

class LLM:
    def __init__(self):
        self.model = VertexAI(model_name="gemini-1.0-pro-001")

    def combine_docs_chain(self):
        system_prompt = few_shot_prompt  
        prompt = ChatPromptTemplate.from_messages([
            ("system", system_prompt),
            ("human", "{input}"),
        ])
        return create_stuff_documents_chain(llm=self.model, prompt=prompt)


## Dental chatbot

In [28]:

class DentalChatbot:
    def __init__(self):
        self.llm = LLM()
        self.model = self.llm.model
        self.retriever = db.as_retriever()
        self.chat_history = []
        self.feedback_counter = 0
        self.translator = Translator()

    def get_rag_chain(self):
        chain = self.llm.combine_docs_chain()
        return create_retrieval_chain(self.retriever, chain)

    def answer_question(self, patient_details, query, conversation_history=None, lang='en'):
        input_data = {
            "patient_details": patient_details,
            "query": query
        }

        if conversation_history:
            history_aware_retriever = create_history_aware_retriever(
                self.model, self.retriever, contextualize_q_prompt
            )
            qa_prompt = ChatPromptTemplate.from_messages(
                [
                    ("system", system_prompt),
                    MessagesPlaceholder("chat_history"),
                    ("human", "{input}"),
                ]
            )
            question_answer_chain = create_stuff_documents_chain(self.model, qa_prompt)
            rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
            result = rag_chain.invoke({"input": input_data, "chat_history": conversation_history})
            answer = result['answer']
        else:
            chain = self.get_rag_chain()
            result = chain.invoke({"input": input_data})
            answer = result['answer']

        if answer == 'NoIdea':
            ai_prompt = PromptTemplate.from_template(user_prompt_model)
            chain = ai_prompt | self.llm.model
            answer = chain.invoke({"query": query})

        detected_lang = self.translator.detect(query).lang
        if detected_lang != 'en':
            answer = self.translator.translate(answer, src='en', dest=detected_lang).text

        return answer

    def interact(self):
        while True:
            name = input("Enter your name: ")
            age = input("Enter your age: ")
            symptoms = input("Describe your symptoms: ")
            medical_condition = input("Do you have any underlying medical conditions? (yes/no): ")
            if medical_condition.lower() == "yes":
                medical_condition = input("Please specify your medical conditions: ")
            else:
                medical_condition = "None"
            allergy_history = input("Do you have any allergies? (yes/no): ")
            if allergy_history.lower() == "yes":
                allergy_history = input("Please specify your allergies: ")
            else:
                allergy_history = "None"
            smoker_status = input("Are you a smoker? (yes/no): ")
            current_dental_history = input("When was your last dental check-up? Have you had any major procedures? ")

            patient_details = {
                "name": name,
                "age": age,
                "symptoms": symptoms,
                "medical_condition": medical_condition,
                "allergy_history": allergy_history,
                "smoker_status": smoker_status,
                "current_dental_history": current_dental_history
            }

            question = input("Please ask your dental question: ")
            if question.lower() == "quit":
                break
            response = self.answer_question(patient_details=patient_details, query=question, conversation_history=self.chat_history)
            print("Response:", response)
            self.chat_history.extend([
                HumanMessage(content=question),
                AIMessage(content=response),
            ])

            self.feedback_counter += 1
            if self.feedback_counter % 5 == 0:
                user_feedback = input("Was this answer helpful? (yes/no): ")
                if user_feedback.lower() == "yes":
                    continue
                elif user_feedback.lower() == "no":
                    new_query = input("I'm sorry. Can you please provide more details or ask another question: ")
                    if new_query:
                        response = self.answer_question(patient_details=patient_details, query=new_query, conversation_history=self.chat_history)
                        print("Response:", response)
                        self.chat_history.extend([
                            HumanMessage(content=new_query),
                            AIMessage(content=response),
                        ])
                    else:
                        print("Okay, let me know if you have any other questions.")
                        continue

# Example
if __name__ == "__main__":
    chatbot = DentalChatbot()
    chatbot.interact()

Enter your name:  Aastha Poudel
Enter your age:  30
Describe your symptoms:  teethache
Do you have any underlying medical conditions? (yes/no):  yes
Please specify your medical conditions:  thyroid
Do you have any allergies? (yes/no):  yed
Are you a smoker? (yes/no):  no
When was your last dental check-up? Have you had any major procedures?  2years
Please ask your dental question:  i am having teethache . i alsonoticed blood while i was brushing my teeth. what should i do?


TypeError: expected string or bytes-like object

### Step 3: Define function for initializing database with embeddings:

In [64]:

def load_db_with_embeddings(documents):
    print("Applying embeddings Google...")
    gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    print("Store to FAISS applying gemini embeddings...")

    store = LocalFileStore("./cache/")
    cached_embedder = CacheBackedEmbeddings.from_bytes_store(
        gemini_embeddings, store, namespace=gemini_embeddings.model
    )

    db = FAISS.from_documents(documents, cached_embedder)
    return db


### Step 4: Define function for creating RAG chain with CoT and Google extraction:

In [1]:

def create_rag_chain(db, prompt):
    system_prompt = (
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know. Use three sentences maximum and keep the "
        "answer concise."
        "\n\n"
        "{context}"
    )
    system_feedback_prompt = (
        "Please provide feedback on the answer:"
        "\n\n"
        "{context}"
    )
    prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        ("human", "{input}"),
    ])
    feedback_prompt = ChatPromptTemplate.from_messages([
        ("system", system_feedback_prompt),
        ("human", "{input}"),
    ])

    return create_stuff_documents_chain(
        model=VertexAI(model_name="gemini-1.0-pro-001"),
        prompt=prompt,
        feedback_prompt=feedback_prompt,
        retriever=db.as_retriever(),
        feedback_handler=collect_feedback,
        prompt_context_explainer=None,  # Add a context explainer if needed
        use_context_cache=True,
        force_reretrieval=False,
        retriever_data_override=None,
        extract_from_google=True  # Set to True to enable fetching from Google
    )


### Step 5:Define Prompt Template

In [66]:

def create_prompt_template():
    system_prompt = (
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know. Use three sentences maximum and keep the "
        "answer concise."
        "\n\n"
        "{context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    return prompt


### Step 6: Implement RAG Chain

In [67]:

def create_rag_chain(db, prompt):
    model = VertexAI(model_name="gemini-1.0-pro-001")
    retriever = db.as_retriever()
    question_answer_chain = create_stuff_documents_chain(model, prompt)
    rag_chain = create_retrieval_chain(retriever, question_answer_chain)
    return rag_chain


### Step 7: Setup Multilingual Support

In [68]:

from langchain_google_community import GoogleTranslateTransformer

def translate_query(query, target_language='en'):
    translate_client = GoogleTranslateTransformer(api_key=API_KEY)
    translation = translate_client.translate(query, target_language=target_language)
    return translation['translatedText']

def multilingual_support(query, target_language='en'):
    translated_query = translate_query(query, target_language)
    return translated_query


### Step 8: Implement Feedback Mechanism

In [69]:

class Feedback:
    def __init__(self):
        self.engine = create_engine('sqlite:///feedback.db', echo=True)
        Base = declarative_base()

        class Feedback(Base):
            __tablename__ = 'feedback'
            id = Column(Integer, primary_key=True)
            query = Column(String)
            rating = Column(Integer)

        Base.metadata.create_all(self.engine)
        Session = sessionmaker(bind=self.engine)
        self.session = Session()

    def store_feedback(self, query, rating):
        feedback = Feedback(query=query, rating=rating)
        self.session.add(feedback)
        self.session.commit()


### Step 9: Example