In [9]:
import os
from langchain_groq import ChatGroq
from langchain_ollama import ChatOllama,OllamaEmbeddings
from langchain_community.document_loaders import PyPDFium2Loader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_core.prompts import ChatMessagePromptTemplate, ChatPromptTemplate, MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_history_aware_retriever,create_retrieval_chain, LLMChain
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory, RedisChatMessageHistory
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, ConfigurableFieldSpec, RunnableConfig
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.messages import AIMessage, HumanMessage
from dotenv import load_dotenv
load_dotenv()

True

In [10]:
groq_api_key = os.getenv("GROQ_API_KEY")

In [11]:
#LLMS Models
ollama_model = ChatOllama(model="gemma3:4b")
groq_model = ChatGroq(model="llama-3.3-70b-versatile",groq_api_key=groq_api_key)

In [12]:
from langchain_core.output_parsers import StrOutputParser

In [5]:
prompt_1 = ChatPromptTemplate.from_messages(
    [
        ("system","you are a helpful assistant. Please response and guide the user to the question asked"),
        ("user","Qusetion : {question}" )
    ]
)

In [6]:
chain_1 = prompt_1 | groq_model | StrOutputParser()

In [7]:
response_123 = chain_1.invoke({"question" : "do you know who is shahrukh khan ?"})

In [8]:
response_123

'Yes, I do know who Shah Rukh Khan is. Shah Rukh Khan, often referred to as SRK, is a highly acclaimed Indian film actor, producer, and television personality. He is widely regarded as one of the most successful and influential actors in Indian cinema, particularly in the Bollywood film industry.\n\nBorn on November 2, 1965, in New Delhi, India, Shah Rukh Khan has appeared in over 80 films throughout his career, which spans more than three decades. He has won numerous awards, including 14 Filmfare Awards, and has been honored with the Padma Shri, India\'s fourth-highest civilian award, for his contributions to the arts.\n\nSome of his most notable films include "Dilwale Dulhania Le Jayenge," "Kuch Kuch Hota Hai," "Devdas," "Chak De India," and "Chennai Express." He is known for his charismatic on-screen presence, versatility as an actor, and his ability to connect with audiences worldwide.\n\nShah Rukh Khan is not only a talented actor but also a successful entrepreneur, with business 

In [13]:
#Embedding model
ollama_embedding = OllamaEmbeddings(model="nomic-embed-text:v1.5")

In [14]:
#initialising vectordb
vector_db = Chroma(collection_name="core_and_memory",
    embedding_function=ollama_embedding,
    persist_directory="./chroma_db")

In [15]:
#reading the pdf and keeping contents into the vectorDb
doc_loader = PyPDFium2Loader("2307.06435v10.pdf")
docs = doc_loader.load()



In [16]:
# Text splitter 
splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 300)
chunked_docs = splitter.split_documents(docs)

In [17]:
# Adding metadata in order to shrink the search space for future use 
for doc in chunked_docs:
    doc.metadata["type"] = "external_docs"

In [43]:
custom_doc = Document(
        page_content=f"Hi my name is souvik.",
        metadata={"type": "chat_memory","user_id" : "souvik_1"}
    )

vector_db.add_documents([custom_doc])

['d5486553-0c10-413a-9aec-0571d264407e']

In [58]:
content_retriever_ex = vector_db.as_retriever(search_kwargs={"k": 5,"filter": {
            "$and": [
                {"type": "chat_memory"},
                {"user_id": "venom_magi"}
            ]
        }})

In [59]:
docs_exp = content_retriever_ex.invoke("what is my name ?")

In [60]:
docs_exp

[]

In [11]:
#storing the information into vector Db
vector_db.add_documents(chunked_docs)

['b5098bfa-746e-4c40-90ea-760644c8c6b3',
 'f6ea0398-de9e-470a-abf6-d4ebc1fbf78c',
 '6ea4bb36-48b8-4e18-8d1c-2c3df9323849',
 '3a4ace30-5c3d-482b-ae52-98b1b36ce3c9',
 '56778fae-bf78-422f-bbd2-332212c4a310',
 'dcbb76fd-9ca3-4a68-a24d-fe0335b826b0',
 '049d1b1f-92f1-4aa8-9d5a-d8e0f64bf3ee',
 'da4a81b4-88f7-4305-8386-3bc2098c100f',
 '8c8cc0c5-7c00-43fa-ab94-048df9cde178',
 '4ca56b4e-4783-4e5c-bb8a-0d14a2a7b4e4',
 '894b8a54-67df-4798-be0f-59c2af643983',
 '5231a655-8f5a-4a38-8fb8-d1e194fdd05c',
 '41a73e5d-1537-401b-9385-806e8483a7bb',
 '226a4027-4b52-4e26-b451-9075f9868133',
 'ff8af3b7-7d06-427e-8184-7b339d270050',
 '89085f58-9690-456e-a610-b32255779113',
 '817b2a77-6a43-4b01-8e07-d996fc3b8eb5',
 '4389fea0-bc4c-44b1-8717-b2a159b39477',
 '86efbbf1-acb2-4f38-9795-389a77043282',
 '20325e99-c274-435d-91ef-6162077b29b6',
 '4f0a5803-237c-4cae-bb71-50daa86b24a8',
 '50e1edeb-3f63-4478-ab5f-4f508b0586ac',
 'ef59b7d9-4145-44a8-a6d8-944a360c2e2a',
 'fe848f97-2674-4033-8586-17d9eb7f95a9',
 '9832c458-865d-

In [18]:
# Making two different retrievers to fetch context and previous messages seperately
content_retriever = vector_db.as_retriever(search_kwargs={"k": 5, "filter" : {"type": "external_docs"}})

def get_message_retriever(session_id : str):
    return vector_db.as_retriever(search_kwargs={"k": 5,"filter" : {"$and" : [{"type" : "chat_memory"},{"user_id" : session_id}]}})

In [19]:
# Making prompt for search query generation for the context using current message and previous message.
search_query_prompt = ChatPromptTemplate.from_messages([
    ("system", 
     "You are a query‑rewrite assistant.  Your job is to turn the given "
     "two types of conversation (Old Related Conversation Memory and Recent Conversation History) and a follow‑up question into "
     "a single, self‑contained search string optimized for our vector database. "
    ),
    ("human", 
     "=== Old Related Conversation Memory Start ===\n"
     "{old_related_memory}\n"
     "=== Old Related Conversation Memory End ===\n\n"
     "=== Recent Conversation History Start ===\n"
     "{recent_memory}\n"
     "=== Recent Conversation History End ===\n\n"
     "=== User Question ===\n"
     "{input}\n"
     "=== End ===\n\n"
     "Please output **only** the rewritten search query on one line:")
])

In [20]:
from langchain_core.output_parsers import StrOutputParser

In [21]:
#  making the rephrase or search query generator chain using LLM chain 
# search_query_chain = LLMChain(llm = groq_model , prompt = search_query_prompt)
search_query_chain = search_query_prompt | groq_model | StrOutputParser()
search_query_chain_ollama = search_query_prompt | groq_model | StrOutputParser()

In [None]:
# Making the Q_A_Prompt for conversation 


qa_prompt = ChatPromptTemplate.from_template(
    """You're a smart, friendly AI conversational chat assistant. Answer the user's question through natural conversation while following these STRICT rules:
    
    1. For general knowledge type QUESTION (e.g. - general knowledge, public figures, common facts, etc.) answer directly using your own knowledge without including CONTEXT/RELATED PAST CHAT MEMORY/RECENT CHAT HISTORY MEMORY.
    2. For non general knowledge type QUESTION (e.g. - user-specific information, past conversations, private details, document content, etc.) you can take help of the given 
       CONTEXT/RELATED PAST CHAT MEMORY/RECENT CHAT HISTORY MEMORY to answer the user's QUESTION but there are two rules :
       - if user's question can be answered using the CONTEXT/RELATED PAST CHAT MEMORY/RECENT CHAT HISTORY MEMORY  →  just answer it.
       - if user's question cannot be answered using the CONTEXT/RELATED PAST CHAT MEMORY/RECENT CHAT HISTORY MEMORY  →  say the user that you're not aware of the question in your own words.
    3. NEVER mention specific details from context/memory in your answers when not explicitly asked by user.
    4. NEVER mention that you're using context or memory in your answers.
    5. Keep the conversation human like natural.
    
    === CONTEXT === 
    {context}
    
    === RELATED PAST CHAT MEMORY ===
    {old_memory_context}

    === RECENT CHAT HISTORY MEMORY ===
    {recent_memory_context}
    
    === QUESTION ===
    {input}
    """
)


In [44]:
# Creating a stuff_doc documents chain

qa_stuff_chain = create_stuff_documents_chain(
    llm=groq_model,
    prompt=qa_prompt,
    document_variable_name="context"
)

qa_stuff_chain_ollama = create_stuff_documents_chain(
    llm=ollama_model,
    prompt=qa_prompt,
    document_variable_name="context"
)

In [24]:
#creating session_dictionary and function 

session_hist = {}

def get_session_history(session_id : str) -> BaseChatMessageHistory:
    if session_id not in session_hist:
        session_hist[session_id] = ChatMessageHistory()
    return session_hist[session_id]


In [45]:
def hybrid_runnable_fn(inputs: dict, config: RunnableConfig | None = None) -> dict:
    # Get session_id from config instead of inputs
    if config is None or "configurable" not in config or "session_id" not in config["configurable"]:
        raise ValueError("Session ID not found in config")
    user_id = config["configurable"]["session_id"]
    
    user_question = inputs["input"]
    full_history = inputs.get("chat_history",[])
    print(full_history[-1].content)

    # Pull just the last N messages from full_history for recency -
    N = 15
    last_N = full_history[-N:] if len(full_history) >= N else full_history
    recency_text = "\n\n".join(
        f"{'User' if isinstance(m, HumanMessage) else 'Assistant'}: {m.content}"
        for m in last_N
    )

    # Semantic retrieve older memory from Chroma (filtered by user_id) -
    memory_retriever = get_message_retriever(user_id)
    memory_docs = memory_retriever.invoke(user_question)
    older_context = "\n\n".join(doc.page_content for doc in memory_docs)

    # combined_memory = "\n\n".join(s for s in [older_context, recency_text] if s)

    # Rephrase + retrieve from external KB - (to make the search query)
    rephrased_query = search_query_chain.invoke({
        "old_related_memory": older_context,
        "recent_memory" : recency_text,
        "input": user_question
    }).strip()
    kb_docs = content_retriever.invoke(rephrased_query)
    # kb_context = "\n\n".join(doc.page_content for doc in kb_docs)

    # Stuff into final prompt -
    answer_text = qa_stuff_chain.invoke({
        "context": kb_docs,
        "old_memory_context": older_context,
        "recent_memory_context" : recency_text,
        "input": user_question
    })

    # Persist this turn as a new chat_memory doc in Chroma ─────────────
    new_chat_doc = Document(
        page_content=f"User: {user_question}\nAssistant: {answer_text}",
        metadata={"type": "chat_memory", "user_id": user_id}
    )
    vector_db.add_documents([new_chat_doc])

    # Build updated history for RunnableWithMessageHistory ──────────────
    updated_history = full_history + [
        HumanMessage(content=user_question),
        AIMessage(content=answer_text)
    ]

    return {
        "answer": answer_text,
        "chat_history": updated_history
    }


hybrid_runnable = RunnableLambda(func=hybrid_runnable_fn)

In [46]:
runnable_with_history = RunnableWithMessageHistory(
    hybrid_runnable,
    get_session_history,
    input_messages_key="input",        # takes {"input": ...}
    history_messages_key="chat_history",  # separate key for full history
    output_messages_key="answer",         # what key your inner fn returns
    history_factory_config=[
        ConfigurableFieldSpec(
            id="session_id",
            annotation=str,
            name="Session ID",
            description="Unique identifier for this session.",
            default="",
            is_shared=True
        )
    ]
)

In [32]:
config_1 = {"configurable":{"session_id": "rtx_venom_1"}}

In [47]:
resp_1 = runnable_with_history.invoke({"input" : "she is so beautiful"},config=config_1)


That's so sweet, Rohit. It's great to hear that you care about Sikta deeply. What's it about Sikta that you love the most?


In [48]:
resp_1

{'answer': "That's wonderful, it's great that you appreciate Sikta's beauty, both inside and out. What do you think makes her stand out to you, is it just her physical appearance or is there something more that draws you to her?",
 'chat_history': [HumanMessage(content='what is the name of my girlfriend ?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="I'm not aware of that information.", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Hi I am Rohit', additional_kwargs={}, response_metadata={}),
  AIMessage(content="Hello Rohit, it's nice to meet you. How can I assist you today?", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='My girlfriend is Sikta and I love her', additional_kwargs={}, response_metadata={}),
  AIMessage(content="That's so sweet, Rohit. It's great to hear that you care about Sikta deeply. What's it about Sikta that you love the most?", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='sh

In [19]:
from datasets import load_dataset

# 1. load SQuAD v1.1
dataset = load_dataset("squad", split="validation[:1000]")

# 2. extract (question, gold answer)
examples = []
for ex in dataset:
    q = ex["question"]
    # take the first gold answer
    gold = ex["answers"]["text"][0]
    examples.append({"question": q, "gold_answer": gold})

Generating train split: 100%|██████████| 87599/87599 [00:03<00:00, 22113.06 examples/s]
Generating validation split: 100%|██████████| 10570/10570 [00:00<00:00, 622823.09 examples/s]
