In [31]:
import os
from pathlib import Path

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFacePipeline
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

In [32]:
# Load Hugging Face API key from environment variable
HUGGING_FACE_API = os.environ.get("HUGGING_FACE_API", None)
if not HUGGING_FACE_API:
    print("Warning: HUGGING_FACE_API environment variable not set")

In [33]:
PDF_FOLDER = Path("../data/The Godfather Summary.pdf")      # <-- change this
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
GEN_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 200

In [34]:
def load_pdfs(pdf_path: Path):
    docs = []
    # Accept a single file or a folder
    if pdf_path.is_file():
        paths = [pdf_path]
    else:
        paths = list(pdf_path.glob("*.pdf"))

    for p in paths:
        loader = PyPDFLoader(str(p))          # <-- PyPDFLoader in action
        docs.extend(loader.load())
    return docs


raw_docs = load_pdfs(PDF_FOLDER)
print(f"Loaded {len(raw_docs)} pages from {PDF_FOLDER}")

Loaded 3 pages from ../data/The Godfather Summary.pdf


In [35]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP,
    length_function=len,
)
chunks = text_splitter.split_documents(raw_docs)
print(f"Created {len(chunks)} chunks")

Created 15 chunks


In [36]:
embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 6})  # top-6 chunks

In [37]:
tokenizer = AutoTokenizer.from_pretrained(GEN_MODEL, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    GEN_MODEL,
    device_map="auto",          # uses GPU if available, else CPU
    torch_dtype="float32",
    trust_remote_code=True,
)
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=300,
    temperature=0.2,
    top_p=0.95,
    do_sample=True,
)
llm = HuggingFacePipeline(pipeline=pipe)

Loading checkpoint shards: 100%|██████████| 3/3 [00:18<00:00,  6.17s/it]
Some parameters are on the meta device because they were offloaded to the disk.
Device set to use mps


In [38]:
template = """You are a helpful assistant. Answer the question using ONLY the provided context.
If the context does not contain the answer, say "I don't know".

Context:
{context}

Question: {question}
Answer:"""

prompt = PromptTemplate.from_template(template)

In [39]:
def format_context(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_context, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
question = "What is the main conclusion of the paper about climate change?"
answer = rag_chain.invoke(question)
print("\n--- ANSWER ---")
print(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [None]:
question = "What is the name of the Don?"
answer = rag_chain.invoke(question)
print("\n--- ANSWER ---")
print(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--- ANSWER ---
You are a helpful assistant. Answer the question using ONLY the provided context.
If the context does not contain the answer, say "I don't know".

Context:
To this day it’s jolting to see Brando as Don Corleone — 
the receded hairline, the gray pencil moustache, jowls 
hanging off a twisted mouth, and a voice cracked from 
years of command. Brando makes the character extraor-
dinarily complex largely through his physical expressive-
ness. He walks as if his shoulder blades were pinned be-
hind him (which emphasizes an old man’s paunch in 
front). But the sensibility beneath the authority is aston-
ishingly agile: the Don can suddenly break into mimicry, 
or turn his daughter in a waltz with a slight protective 
bent that catches sentiment in movement. Brando puts 
so much substance into his relatively few scenes, blowing 
hot and cold with equal eclat, that he enables Coppola to 
draw parallels between his sons and himself through nu-
ances at once fleeting and concrete