In [None]:
## Important libraries
import streamlit as st
from langchain_groq import ChatGroq
from dotenv import load_dotenv
import os
import chromadb
import uuid
from langchain.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
import re
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.schema import HumanMessage, AIMessage
from langgraph.prebuilt import ToolNode
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import create_react_agent
from langgraph.graph import MessagesState, StateGraph, START, END
from langchain_core.tools import tool
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [6]:
load_dotenv()

True

In [None]:
# pip install langgraph
# pip install sentence-transformers

In [None]:
# Model for generating Embedding
from langchain_community.embeddings import HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

In [9]:
llm=ChatGroq(
    temperature=0,
    groq_api_key=os.getenv("GROQ_API_KEY"),
    model_name="deepseek-r1-distill-llama-70b"
)

In [None]:
response=llm.invoke("who are you")
response

In [None]:
## Pdf reader and Embedding generation

loader=PyPDFLoader("B09221-eng.pdf")
docs=loader.load()

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)
# documents=documents[:5]

len(documents)


# Initialize embeddings model
embedding_model = OllamaEmbeddings(model="llama3.2:1b")

# Create Chroma vector database
vectorstore = Chroma.from_documents(documents, embedding_model, persist_directory="chroma_db")

# Save the database
vectorstore.persist()

## Chatbot without memory with llama3.2 embedding

In [None]:
'''
# Load the vector store
vectorstore = Chroma(persist_directory="chroma_db", embedding_function=embedding_model)

query = "what is the symptom of Ebola"
# results = vectorstore.similarity_search(query, k=3) 
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 2}) 
results = retriever.get_relevant_documents(query)

for doc in results:
    print(doc.page_content)


response=llm.invoke("what is symptom of Ebola")
print(response)

context = " ".join([doc.page_content for doc in results])
prompt = f"Based on the following information, answer the question:\n\n{context}\n\nQuestion: {query}"
response = llm.invoke(prompt)
'''

74

## Chatbot with BAAI/bge-small-en-v1.5 model

In [None]:
loader=PyPDFLoader("B09221-eng.pdf")
docs=loader.load()

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)



# Create Chroma vector database
vectorstore = Chroma.from_documents(documents, embedding_model, persist_directory="chroma-BAAI")

# Save the database
vectorstore.persist()


In [None]:
# Load the vector store
vectorstore = Chroma(persist_directory="chroma-BAAI", embedding_function=embedding_model)
query = "what is WHO"
results = vectorstore.similarity_search(query, k=3) 
# print(results)
for doc in results:
    print(doc.page_content)

In [None]:
context = " ".join([doc.page_content for doc in results])
prompt = f"Based on the following information, answer the question:\n\n{context}\n\nQuestion: {query}"
response = llm.invoke(prompt)
print(response.content)

In [36]:
memory = ConversationBufferMemory(
    memory_key="chat_history", return_messages=True
)
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
    memory=memory,
)

  memory = ConversationBufferMemory(


In [7]:
chat_history = []
while True:
    query = input("Enter your question (or type 'exit' to quit): ")
    
    if query.lower() == "exit":
        break
    
    # Invoke the QA chain
    response = qa_chain.invoke({"question": query, "chat_history": chat_history})
    
    # Extract and clean response
    cleaned_response = re.sub(r"<think>.*?</think>", "", response["answer"], flags=re.DOTALL)
    

    # Generate follow-up questions
    follow_up_prompt = f"Generate 2-3 follow-up questions related to: {query}"
    follow_up_response = llm.invoke(follow_up_prompt)
    follow_up_questions = follow_up_response.content.split("\n")

    # Update chat history
    chat_history.append(HumanMessage(query))
    chat_history.append(AIMessage(cleaned_response))

    # Display response
    print("\n🤖 Bot:", cleaned_response)
    print("\n💡 Follow-up Questions:")
    for q in follow_up_questions:
        print("-", q.strip())

### Code for memory based Chatbot

In [11]:
tools = []
memory = MemorySaver()
tool_node = ToolNode(tools)
model = llm
bound_model = model.bind_tools(tools)
@tool
def search(query: str):
    """Call to surf the web."""
    # This is a placeholder for the actual implementation
    # Don't let the LLM know this though 😊
    return "It's sunny in San Francisco, but you better look out if you're a Gemini 😈."
def should_continue(state: MessagesState):
    """Return the next node to execute."""
    last_message = state["messages"][-1]
    # If there is no function call, then we finish
    if not last_message.tool_calls:
        return END
    # Otherwise if there is, we continue
    return "action"
# Define the function that calls the model
def call_model(state: MessagesState):
    response = bound_model.invoke(state["messages"])
    # We return a list, because this will get added to the existing list
    return {"messages": response}

# Define a new graph
workflow = StateGraph(MessagesState)

# Define the two nodes we will cycle between
workflow.add_node("agent", call_model)
workflow.add_node("action", tool_node)

# Set the entrypoint as `agent`
# This means that this node is the first one called
workflow.add_edge(START, "agent")

# We now add a conditional edge
workflow.add_conditional_edges(
    # First, we define the start node. We use `agent`.
    # This means these are the edges taken after the `agent` node is called.
    "agent",
    # Next, we pass in the function that will determine which node is called next.
    should_continue,
    # Next, we pass in the path map - all the possible nodes this edge could go to
    ["action", END],
)

# We now add a normal edge from `tools` to `agent`.
# This means that after `tools` is called, `agent` node is called next.
workflow.add_edge("action", "agent")

# Finally, we compile it!
# This compiles it into a LangChain Runnable,
# meaning you can use it as you would any other runnable
app = workflow.compile(checkpointer=memory)

In [12]:
config = {"configurable": {"thread_id": "5"}}

In [15]:
memory = MemorySaver()
query="do you remember my name"

input_message = HumanMessage(content=f"{query}")
for event in app.stream({"messages": [query]}, config, stream_mode="values"):
    cleaned_response = re.sub(r"<think>.*?</think>", "", event["messages"][-1].content, flags=re.DOTALL)
    print(cleaned_response)
    # event["messages"][-1].pretty_print()

do you remember my name


Yes, I remember! You're Dheeraj. How can I assist you further today? 😊
