### Step 1: Load the Required Libraries 

In [71]:

import os
import pickle
from dotenv import load_dotenv
import spacy
import nltk
import vertexai
import random

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sentence_transformers import SentenceTransformer
from googletrans import Translator
from langchain_google_vertexai import VertexAI
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.schema.document import Document
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.agents import Tool, initialize_agent
from langchain.agents.types import AgentType
from googlesearch import search
from langchain.memory import ConversationBufferMemory
from langchain.chains import create_retrieval_chain
from langchain.chains import ConversationChain
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage

nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

### Download NLTK and SpaCy Resources and configure vertex ai

load_dotenv()
api_key = os.getenv('API_KEY')
os.environ['GOOGLE_API_KEY'] = api_key
project_id = "mimetic-fulcrum-407320"
vertexai.init(project=project_id, location="us-central1")

# Initialize FAISS retriever
#embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
#faiss_retriever = FAISS(embedding_model)


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/adarshbhattarai/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/adarshbhattarai/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/adarshbhattarai/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Step 2: Load the Required Data

In [72]:

all_content=[]

class Content:
    def __init__(self, url, paragraphs):
        self.url = url
        self.paragraphs = paragraphs

with open('allContent.pkl', 'rb') as f:
    all_content = pickle.load(f)

all_content


[<__main__.Content at 0x7fd754491040>,
 <__main__.Content at 0x7fd7103b4a00>,
 <__main__.Content at 0x7fd711008670>,
 <__main__.Content at 0x7fd711008df0>,
 <__main__.Content at 0x7fd711008dc0>,
 <__main__.Content at 0x7fd711008070>,
 <__main__.Content at 0x7fd711008be0>,
 <__main__.Content at 0x7fd711008760>,
 <__main__.Content at 0x7fd7110089a0>,
 <__main__.Content at 0x7fd711008a30>,
 <__main__.Content at 0x7fd711008700>,
 <__main__.Content at 0x7fd711008a90>,
 <__main__.Content at 0x7fd711008820>,
 <__main__.Content at 0x7fd711008430>,
 <__main__.Content at 0x7fd711008b80>,
 <__main__.Content at 0x7fd711008880>,
 <__main__.Content at 0x7fd711008370>,
 <__main__.Content at 0x7fd7110082b0>,
 <__main__.Content at 0x7fd7110082e0>,
 <__main__.Content at 0x7fd711008d00>,
 <__main__.Content at 0x7fd711008520>,
 <__main__.Content at 0x7fd7110089d0>,
 <__main__.Content at 0x7fd7110087f0>,
 <__main__.Content at 0x7fd711008fa0>,
 <__main__.Content at 0x7fd723a097c0>,
 <__main__.Content at 0x7

### Step 3: Preprocessing the Data

### Tokenization and Lemmatization, Stopwords Removal

In [73]:

lemmatizer = WordNetLemmatizer()
def preprocess_text(text):
    tokens = word_tokenize(text)
    lemmatized_tokens = [lemmatizer.lemmatize(token.lower()) for token in tokens]
    return " ".join(lemmatized_tokens)

stop_words = set(stopwords.words('english'))
def remove_stopwords(text):
    tokens = word_tokenize(text)
    filtered_tokens = [token for token in tokens if token.lower() not in stop_words]
    return " ".join(filtered_tokens)

knowledge_base = {}
for content in all_content:
    knowledge_base[content.url] = content.paragraphs

for url, paragraphs in knowledge_base.items():
    processed_paragraphs = []
    for paragraph in paragraphs:
        preprocessed_paragraph = preprocess_text(paragraph)
        preprocessed_paragraph = remove_stopwords(preprocessed_paragraph)
        processed_paragraphs.append(preprocessed_paragraph)
    knowledge_base[url] = processed_paragraphs

print(knowledge_base)




###  Step 4: Define Embeddings Using FAISS

In [74]:

gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

allDocuments = []

for url, paragraphs in knowledge_base.items():
    for paragraph in paragraphs:
        if paragraph.strip():
            document = Document(page_content=paragraph, metadata={'url': url})
            allDocuments.append(document)

db = FAISS.from_documents(allDocuments, gemini_embeddings)



### Step 6: Define the LLM (Placeholder), COT, RAG

In [75]:

class LLM:
    def __init__(self):
        self.model = VertexAI(model_name="gemini-1.0-pro-001")

    def combine_docs_chain(self):
        system_prompt = (
            "You are an assistant for question-answering tasks. "
            "Use the following pieces of retrieved context to answer "
            "the question."
            "Use three sentences maximum and keep the "
            "answer concise. If you don't know the answer, just reply 'NoIdea'."
            ""
            "\n\n"
            "{context}"
        )

        prompt = ChatPromptTemplate.from_messages([
            ("system", system_prompt),
            ("human", "{input}"),
        ])

        return create_stuff_documents_chain(llm=self.model, prompt=prompt)


### Step 8: Creating the Dental Chatbot Class

In [76]:

user_prompt_model = """Given the user query {query} , present your answer in 3 sentence 
    and make it as clear and concise"""

rephrase_prompt_model = """Given the user query {query} , present your answer in 3 sentence 
    and make it as clear and concise. be context aware as user has sent history of conversation as well"""

system_prompt = (
            "You are an assistant for question-answering tasks. "
            "Use the following pieces of retrieved context and latest history if present to answer "
            "the question."
            "Use three sentences maximum and keep the "
            "answer concise. If you don't know the answer, just reply 'NoIdea'."
            ""
            "\n\n"
            "{context}"
        )

### Contextualize question ###
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is. Keep your answer concise and use 3 sentences to answer"
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
class DentalChatbot:
    def __init__(self):
        self.llm = LLM()
        self.model = self.llm.model
        self.retriever = db.as_retriever()

    def get_rag_chain(self):
        chain = self.llm.combine_docs_chain()
        rag_chain = create_retrieval_chain(self.retriever, chain)
        return rag_chain

    def answer_question(self, query, conversation_history=None, lang='en'):
        if conversation_history:
            history_aware_retriever = create_history_aware_retriever(
                self.model, self.retriever, contextualize_q_prompt
            )
            qa_prompt = ChatPromptTemplate.from_messages(
                [
                    ("system", system_prompt),
                    MessagesPlaceholder("chat_history"),
                    ("human", "{input}"),
                ]
            )
            question_answer_chain = create_stuff_documents_chain(self.model, qa_prompt)
            rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
            result = rag_chain.invoke({"input":query, "chat_history": conversation_history})
            #print("printing result", result)
            answer = result['answer']
        else:
            chain = self.get_rag_chain()
            result = chain.invoke({"input": query})
            answer = result['answer']
        if (answer == 'NoIdea'):
            # Use Google's LLM if answer not found in uploaded content
            print("No relevant documents found. using models response")
            ai_prompt = PromptTemplate.from_template(user_prompt_model)
            chain = ai_prompt | self.llm.model
            answer = chain.invoke({"query": query})
        
        return answer


### Step 9: Example Usage

In [77]:

chatbot = DentalChatbot()
question = input("Ask Your question ")
response = chatbot.answer_question(question)
print(f"Response: {response}")
chat_history = []
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=response),
    ]
)
i=0
while True:
    
    question = input("Ask Question")
    if question== "quit":
        break
    i=i+1
    response = chatbot.answer_question(query=question,conversation_history=chat_history)
    print("Answer:\n" , response)
    chat_history.extend(
        [
            HumanMessage(content=question),
            AIMessage(content=response),
        ]
    )
    

Ask Your question  why is my teeth yellow?


No relevant documents found. using models response
Response: Your teeth may appear yellow due to several factors, including the natural aging process, consumption of staining foods and beverages, and poor oral hygiene. The yellowing can also be caused by underlying medical conditions or medications.


Ask Question quit


### Step 10: Implementing Multilingual Support

In [69]:

def answer_question(self, question, conversation_history=None, lang='en'):
        question_translated = translate_text(question, dest_lang='en')
        rag_chain = self.get_rag_chain()
        agent = self.get_agent()
        
        if conversation_history:
            chain = ConversationalRetrievalQAChain.from_llm(self.llm.model, retriever=self.retriever, conversation_history=conversation_history)
            try:
                answer = chain.run(question_translated)
            except:
                print("No relevant documents found. Using Google Search.")
                answer = agent.run(question_translated)
        else:
            try:
                answer = rag_chain.run(question_translated)
            except:
                print("No relevant documents found. Using Google Search.")
                answer = agent.run(question_translated)
        
        answer_translated = translate_text(answer, dest_lang=lang)
        return answer_translated


### Step 11: Implementing User Feedback and Iterative Refinements

In [70]:

class DentalChatbot:
    def __init__(self):
        self.llm = LLM()
        self.model = self.llm.model
        self.retriever = db.as_retriever()
        self.feedback_log = []

    def get_rag_chain(self):
        chain = self.llm.combine_docs_chain()
        rag_chain = create_retrieval_chain(self.retriever, chain)
        return rag_chain

    def get_agent(self):
        tools = [
            Tool(
                name="Google Search",
                func=self.google_search,
                description="Use this tool to search the internet for information."
            ),
        ]
        agent = initialize_agent(
            tools,
            self.llm.model,
            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
            verbose=True
        )
        return agent

    def google_search(self, query):
        search_results = []
        for result in search(query, num_results=5):
            search_results.append(result)
        return search_results

    def answer_question(self, question, conversation_history=None, lang='en'):
        question_translated = translate_text(question, dest_lang='en')
        rag_chain = self.get_rag_chain()
        agent = self.get_agent()
        
        if conversation_history:
            chain = ConversationalRetrievalQAChain.from_llm(self.llm.model, retriever=self.retriever, conversation_history=conversation_history)
            try:
                answer = chain.run(question_translated)
            except:
                print("No relevant documents found. Using Google Search.")
                answer = agent.run(question_translated)
        else:
            try:
                answer = rag_chain.run(question_translated)
            except:
                print("No relevant documents found. Using Google Search.")
                answer = agent.run(question_translated)
        
        answer_translated = translate_text(answer, dest_lang=lang)
        return answer_translated

    def log_feedback(self, question, answer, feedback):
        self.feedback_log.append({'question': question, 'answer': answer, 'feedback': feedback})
        # Here you could add logic to store feedback in a database or file for further analysis

# Example usage of feedback
if __name__ == "__main__":
    chatbot = DentalChatbot()
    query = "What are the symptoms of gum disease?"
    response = chatbot.answer_question(query, lang='en')
    print("Chatbot response:", response)

    # Simulate user feedback
    feedback = input("Please rate the response (1-5): ")
    chatbot.log_feedback(query, response, feedback)
    print("Feedback logged.")


NameError: name 'translate_text' is not defined

### Example Usage with Feedback

In [None]:


class DentalChatbot:
    def __init__(self):
        self.llm = LLM()
        self.model = self.llm.model
        self.retriever = db.as_retriever()
        self.chat_history = []
        self.feedback_counter = 0
        self.translator = Translator()

        self.eval_llm = VertexAI(model_name="gemini-1.5-pro", temperature=0.5)

        system_prompt = (
            "Act as a professional and knowledgeable dentist assistant for question-answering tasks."
            "Use the following pieces of retrieved context the question."
            "Think step by step."
            "If the question is relevant to the context and history, provide clear and concise answer."
            "As an AI-assisted dentist, you are expected to help all age groups of people using semi-formal language."
            "If the question is relevant to the context and history, provide a clear and concise answer in an empathetic and respectful way."
            "If the user query is irrelevant to the context, just answer 'NoIdea'."
            "\n\n{context}"
        )
        combine_prompt = chain_of_thought_prompt + "\n\n" + system_prompt
        self.combine_prompt = combine_prompt

    def get_rag_chain(self):
        chain = self.llm.combine_docs_chain()  # Assuming this method is defined in LLM
        return create_retrieval_chain(self.retriever, chain)

    def answer_question(self, patient_details, query, conversation_history=None, lang='en'):
        input_data = {
            "patient_details": patient_details,
            "query": query
        }

        if conversation_history:
            history_aware_retriever = create_history_aware_retriever(
                self.model, self.retriever, contextualize_q_prompt
            )
            qa_prompt = ChatPromptTemplate.from_messages(
                [
                    ("system", self.combine_prompt),
                    MessagesPlaceholder("chat_history"),
                    ("human", "{input}"),
                ]
            )
            question_answer_chain = create_stuff_documents_chain(self.model, qa_prompt)
            rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
            result = rag_chain.invoke({"input": str(input_data), "chat_history": conversation_history})
            answer = result['answer']
        else:
            chain = self.get_rag_chain()
            result = chain.invoke({"input": str(input_data)})
            answer = result['answer']
    
        if answer == 'NoIdea':
            user_prompt_model = """
            You are the dental assistant. The user is reaching out to you because the knowledge base couldn't find 
            relevant information and is seeking help from the LLM. Given the user query {query}, respond based on the original 
            model's format which is appended below. Ensure your response is context-aware for the current user, considering the conversation history provided by the same user.
    
            User information is provided here for your reference.
    
            Its a JSON format data where patient_details holds patient information and query holds query information
            "patient_details": patient_details,
            "query": query
    
            {detail}
            """
            ai_prompt = PromptTemplate.from_template(user_prompt_model + "\n\n" + self.combine_prompt)
            chain = ai_prompt | self.llm.model
            answer = chain.invoke({"query": query, "context": "Dental Chatbot", "detail": str(input_data)})
    
            detected_lang = self.translator.detect(query).lang
            if detected_lang != 'en':
                answer = self.translator.translate(answer, src='en', dest=detected_lang).text
        
        return answer

    def evaluate_answer(self, answer):
        evaluation_prompt = f"Evaluate the following dental chatbot response for its accuracy, clarity, and empathy:\n\n{answer}"
        evaluation = self.eval_llm(evaluation_prompt)
        return evaluation

    def interact(self):
        print("Welcome to the AI-Assisted Dentist chatbot!")
        print("I'm here to help you with your dental questions and concerns.")
        print("At any time, you can type 'quit' to exit the chat.")

        name = input("First, may I have your name please? ")
        age = input(f"Great, thank you. How old are you, {name}? ")
        medical_condition = input("Do you have any underlying medical conditions? (yes/no) ")
        if medical_condition.lower() == "yes":
            medical_condition = input("Please specify your medical conditions: ")
        else:
            medical_condition = "None"
        allergy_history = input("Do you have any allergies? (yes/no) ")
        if allergy_history.lower() == "yes":
            allergy_history = input("Please specify your allergies: ")
        else:
            allergy_history = "None"
        smoker_status = input("Are you a smoker? (yes/no) ")
        current_dental_history = input("When was your last dental check-up? Have you had any major procedures? ")

        patient_details = {
            "name": name,
            "age": age,
            "medical_condition": medical_condition,
            "allergy_history": allergy_history,
            "smoker_status": smoker_status,
            "current_dental_history": current_dental_history
        }

        while True:
            question = input("Please ask your dental question: ")
            if question.lower() == "quit":
                print("Thank you for using the AI-Assisted Dentist chatbot. Have a great day!")
                break

            response = self.answer_question(patient_details=patient_details, query=question, conversation_history=self.chat_history)
            
            print("\nResponse:\n", response)
            #print("\nEvaluation Result:\n", evaluation_result)

            self.chat_history.extend([
                HumanMessage(content=question),
                AIMessage(content=response),
            ])

            self.feedback_counter += 1
            if self.feedback_counter % 5 == 0:
                user_feedback = input("Was this answer helpful? (yes/no) ")
                if user_feedback.lower() == "no":
                    print("I'm sorry to hear that. Please provide more details or ask another question.")
                elif user_feedback.lower() == "quit":
                    print("Thank you for using the AI-Assisted Dentist chatbot. Have a great day!")
                    break
