In [4]:
# =========================================================
# Imports
# =========================================================
import re
from IPython.display import display, Markdown
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_huggingface import HuggingFacePipeline
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

# =========================================================
# Load PDF
# =========================================================
loader = PyPDFLoader("/Users/swatichandna/SynologyDrive/GitHub/NLP/Module 10/ToyData/test.pdf")
raw_docs = loader.load()
print(f"Loaded {len(raw_docs)} pages")

# =========================================================
# Chunk documents
# =========================================================
splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=120,
    separators=["\n\n", "\n", ".", " "]
)
chunks = splitter.split_documents(raw_docs)
print(f"Created {len(chunks)} chunks")

# =========================================================
# Create embeddings + FAISS retriever
# =========================================================
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
print("FAISS retriever ready")

# =========================================================
# Helper to extract clean text
# =========================================================
def get_context_text(query):
    results = retriever.invoke(query)
    if not results:
        return "No relevant context found."
    if hasattr(results[0], "page_content"):
        texts = [doc.page_content for doc in results]
    else:
        texts = [str(doc) for doc in results]
    return "\n\n".join(texts)

# =========================================================
# Hugging Face LLM setup
# =========================================================
model_id = "microsoft/phi-2"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")

hf_gen = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=200,
    temperature=0.3,
    do_sample=False,          # deterministic, avoids multinomial bug
    top_p=0.9,
    top_k=50,
    repetition_penalty=1.1,
    truncation=True,
    pad_token_id=tokenizer.eos_token_id,
)


llm = HuggingFacePipeline(pipeline=hf_gen)
print("Hugging Face model ready")

# =========================================================
# Prompt
# =========================================================
prompt = ChatPromptTemplate.from_template("""
Use the following context to answer the user's question.

Context:
{context}

Question:
{question}

Answer clearly and concisely.
""")

# =========================================================
# Build RAG chain
# =========================================================
rag_chain = (
    {
        "context": lambda q: get_context_text(q),
        "question": RunnablePassthrough(),
    }
    | prompt
    | llm
    | StrOutputParser()
)
print("RAG chain ready")

# =========================================================
#  Visualization utilities
# =========================================================
def highlight_chunks(chunks, color="yellow"):
    """Display retrieved chunks with color highlights in Markdown."""
    highlights = []
    for i, c in enumerate(chunks, 1):
        text = c.page_content if hasattr(c, "page_content") else str(c)
        # Clean up whitespace and tabs
        text = re.sub(r"\s+", " ", text.strip())
        highlights.append(f"<div style='background-color:{color}; padding:8px; margin:6px 0;'>"
                          f"<b>Chunk {i}</b>: {text[:400]}...</div>")
    return "\n".join(highlights)

def visualize_retrieval(question):
    """Show top chunks, prompt, and final answer side-by-side."""
    docs = retriever.invoke(question)
    context_text = get_context_text(question)

    display(Markdown(f"## **Question:** {question}"))
    display(Markdown("### **Top Retrieved Chunks:**"))
    display(Markdown(highlight_chunks(docs, color="#FFF6A4")))

    # Show prompt content (truncated)
    filled_prompt = prompt.invoke({"context": context_text, "question": question})
    display(Markdown("###  **Prompt Sent to Model:**"))
    display(Markdown(f"```text\n{filled_prompt.to_string()[:800]}\n```"))

    # Generate answer
    answer = rag_chain.invoke(question)
    display(Markdown("### **Model Answer:**"))
    display(Markdown(f"> {answer}"))

# =========================================================
# Run the demo
# =========================================================
visualize_retrieval("What is a Large Language Model?")


Loaded 598 pages
Created 1134 chunks
FAISS retriever ready


Loading checkpoint shards: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:00<00:00, 15.30it/s]
Device set to use mps:0


Hugging Face model ready
RAG chain ready


## **Question:** What is a Large Language Model?

### **Top Retrieved Chunks:**

<div style='background-color:#FFF6A4; padding:8px; margin:6px 0;'><b>Chunk 1</b>: than just LLMs in isolation. That, however, brings us to the question: what are large language models? To begin answering this question in this chapter, let‚Äôs first explore the history of Language AI. A Recent History of Language AI The history of Language AI encompasses many developments and models aiming to represent and generate language, as illustrated in Figure 1-1. Figure 1-1. A peek into th...</div>
<div style='background-color:#FFF6A4; padding:8px; margin:6px 0;'><b>Chunk 2</b>: models. What name we give one model or the other does not change how it behaves. Since the definition of the term ‚Äúlarge language model‚Äù tends to evolve with the release of new models, we want to be explicit in what it means for this book. ‚ÄúLarge‚Äù is arbitrary and what might be considered a large model today could be small tomorrow. There are currently many names for the same thing and to us, ‚Äúlar...</div>
<div style='background-color:#FFF6A4; padding:8px; margin:6px 0;'><b>Chunk 3</b>: referred to as large language models. Especially if they are considered to be ‚Äúlarge.‚Äù In practice, this seems like a rather constrained description! What if we create a model with the same capabilities as GPT-3 but 10 times smaller? Would such a model fall outside the ‚Äúlarge‚Äù language model categorization? Similarly, what if we released a model as big as GPT-4 that can perform accurate text class...</div>

###  **Prompt Sent to Model:**

```text
Human: 
Use the following context to answer the user's question.

Context:
than just LLMs in isolation. That, however, brings us to the question: what
are large language models? To begin answering this question in this chapter,
let‚Äôs first explore the history of Language AI.
A Recent History of Language AI
The history of Language AI encompasses many developments and models
aiming to represent and generate language, as illustrated in Figure 1-1.
Figure 1-1. A peek into the history of Language AI.
Language, however, is a tricky concept for computers. Text is unstructured
in nature and loses its meaning when represented by zeros and ones
(individual characters). As a result, throughout the history of Language AI,

models. What name we give one model or the other does not change how it
behaves
```

KeyboardInterrupt: 