1. Imports & Setup

In [8]:
import requests
import uuid
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOllama
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnableWithMessageHistory
from langchain_core.chat_history import InMemoryChatMessageHistory
import os

In [9]:
# Download PDF (keep your existing code)
pdf_url = "https://assets.openstax.org/oscms-prodcms/media/documents/Introduction_to_Python_Programming_-_WEB.pdf"
pdf_path = "sample.pdf"
if not os.path.exists(pdf_path):
    response = requests.get(pdf_url)
    with open(pdf_path, 'wb') as f:
        f.write(response.content)

2. Long-term Memory (PDF Processing)

In [10]:
loader = PyMuPDFLoader(pdf_path)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(docs)

In [11]:
# 3. 🧠 Embeddings + Chroma
embeddings = OllamaEmbeddings(model="nomic-embed-text")
vectorstore = Chroma.from_documents(chunks, embeddings)

3. Retrieval System

In [12]:
# Context-aware chatbot setup
llm = ChatOllama(model="gemma3:12b")

# Create retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# 5. Context-Aware Prompt <---------------
# Prompt template with conversation history
prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer questions based on the context: {context}. Consider previous conversation."),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}")
])

# Create chains
document_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)

4. Short-term Memory (Conversation Context)

In [13]:
# Session store for conversation history
session_store = {}

def get_session_history(session_id: str):
    if session_id not in session_store:
        session_store[session_id] = InMemoryChatMessageHistory()
    return session_store[session_id]

# Context-aware chain
context_aware_chain = RunnableWithMessageHistory(
    retrieval_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer"
)

In [14]:
# Test the context-aware chatbot
session_id = str(uuid.uuid4())

def chat(message):
    config = {"configurable": {"session_id": session_id}}
    response = context_aware_chain.invoke({"input": message}, config=config)
    return response["answer"]

# Test with context awareness
print("=== Context-Aware Chatbot Test ===")

# First question
q1 = "What is Python programming about?"
a1 = chat(q1)
print(f"Q1: {q1}")
print(f"A1: {a1[:200]}...\n")

# Follow-up question (tests short-term memory)
q2 = "Can you explain that in simpler terms?"
a2 = chat(q2)
print(f"Q2: {q2}")
print(f"A2: {a2[:200]}...\n")

# Another follow-up
q3 = "What are its main benefits?"
a3 = chat(q3)
print(f"Q3: {q3}")
print(f"A3: {a3[:200]}...")

=== Context-Aware Chatbot Test ===
Q1: What is Python programming about?
A1: According to the text, Python programming is about:

*   **Input and output:** Getting information into and displaying information out of the program.
*   **Assigning variables:** Storing data in name...

Q2: Can you explain that in simpler terms?
A2: Okay, let's break it down simply:

Python programming is basically about teaching a computer how to do things for you. The text highlights that it's used for:

*   **Getting information in and showing...

Q3: What are its main benefits?
A3: According to the text, the main benefits of using functions in Python programming are:

*   **Modularity:** Functions help organize your code by grouping related statements together.
*   **Reusability...
