In [35]:
from pypdf import PdfReader

reader = PdfReader("Celia's Shrine.pdf")

text = ""
for page in reader.pages:
    page_text = page.extract_text()
    if page_text:  # guard against None
        text += page_text

print(text[:500])


Celia's Shrine
by David Gardiner
This story may be reproduced in whole or in part for any non-commercial
purpose on condition
that authorship is acknowledged and credited. The copyright remains the
property of the author.
I'm glad you like the bungalow. I would like it to go to a happy young couple
like you. We were always very happy here. Well, as happy as anybody ever
is... you know what I mean. Why don't you sit down and I'll make the two of
you a cup of tea?
This was out in the country when 


In [36]:
words=text.split()
chunks=[]
chunk_size=1000
overlap=150
step=chunk_size-overlap

for start in range(0,len(words),step):
    end=start+chunk_size
    chunk_words=words[start:end]
    chunk_text="".join(chunk_words)
    chunks.append(chunk_text)
print(len(chunks))

3


In [37]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings=model.encode(chunks)
print(embeddings)

Loading weights: 100%|██████████| 103/103 [00:00<00:00, 241.53it/s, Materializing param=pooler.dense.weight]                             
BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


[[-0.0280197  -0.056583    0.06115209 ... -0.00698476  0.04047382
  -0.01063019]
 [-0.02640468 -0.01661091  0.03435023 ... -0.09219253  0.04199612
   0.03628125]
 [-0.00504866 -0.06904926  0.08129273 ... -0.01680476  0.05890396
  -0.0038166 ]]


In [38]:
from pinecone import pinecone
import os

In [39]:
from pinecone import Pinecone
PINECONE_API_KEY = "pcsk_7N8TDx_T7CDM66rqR9VvwPRVHfCZd1iJejAdNkA1VStAXXRCcnZeRBcWMibb5pHyXCBfSk"
pc = Pinecone(api_key=PINECONE_API_KEY)
print("Connected to Pinecone ✅")

Connected to Pinecone ✅


In [40]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "mini-rag"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )

index = pc.Index(index_name)
print("Index created or already exists ✅")


Index created or already exists ✅


In [41]:
vectors = []   # empty list to store data

for i in range(len(chunks)):
    chunk_id = "chunk-" + str(i)      # unique ID (string)
    embedding = embeddings[i].tolist()  # convert to list
    metadata = {
        "text": chunks[i]             # original chunk text
    }

    vectors.append((chunk_id, embedding, metadata))


In [42]:
index.upsert(vectors)

UpsertResponse(upserted_count=3, _response_info={'raw_headers': {'date': 'Tue, 03 Feb 2026 15:32:11 GMT', 'content-type': 'application/json', 'content-length': '19', 'connection': 'keep-alive', 'x-pinecone-request-lsn': '3', 'x-pinecone-request-logical-size': '14699', 'x-pinecone-request-latency-ms': '540', 'x-pinecone-request-id': '136333665311235913', 'x-envoy-upstream-service-time': '165', 'x-pinecone-response-duration-ms': '541', 'grpc-status': '0', 'server': 'envoy'}})

In [43]:
print(index.describe_index_stats())

{'_response_info': {'raw_headers': {'connection': 'keep-alive',
                                    'content-length': '181',
                                    'content-type': 'application/json',
                                    'date': 'Tue, 03 Feb 2026 15:32:12 GMT',
                                    'grpc-status': '0',
                                    'server': 'envoy',
                                    'x-envoy-upstream-service-time': '4',
                                    'x-pinecone-request-id': '5274182028047683469',
                                    'x-pinecone-request-latency-ms': '6',
                                    'x-pinecone-response-duration-ms': '8'}},
 'dimension': 384,
 'index_fullness': 0.0,
 'memoryFullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'__default__': {'vector_count': 3}},
 'storageFullness': 0.0,
 'total_vector_count': 3,
 'vector_type': 'dense'}


In [44]:
query_text = "What is this document about?"


In [45]:
question_embedding = model.encode(query_text).tolist()


In [46]:
results = index.query(
    vector=question_embedding,
    top_k=5,
    include_metadata=True
)


In [47]:
for i, match in enumerate(results["matches"]):
    print(f"Chunk {i+1}:")
    print(match["metadata"]["text"][:300])
    print("-" * 40)

Chunk 1:
goon.IthoughttheleastwecoulddowasletthemuseCharlie'sroomforafewdayswhiletheysortedthemselvesout.Iwantedtophonethelocalhospitalandputthemonstandby,butthegirlwouldn'thaveit.Saidthatshehadtogivebirthontheearth,facingacertaindirection,whilesomekindofincantationswerereadout....allkindsofstufflikethat.But
----------------------------------------
Chunk 2:
Celia'sShrinebyDavidGardinerThisstorymaybereproducedinwholeorinpartforanynon-commercialpurposeonconditionthatauthorshipisacknowledgedandcredited.Thecopyrightremainsthepropertyoftheauthor.I'mgladyoulikethebungalow.Iwouldlikeittogotoahappyyoungcouplelikeyou.Wewerealwaysveryhappyhere.Well,ashappyasanyb
----------------------------------------
Chunk 3:
pouredhimastiffdrinkandtookitouttohim.Hedowneditinone.Talkingaboutdrinks,Ithinkthekettle'sboiling.I'lljustgoandmakethetea.Thereisn'tmuchmoretotellyouanyway.Theending?Ohthereisn'tonereally.Theirfriendsintheothervanloadedupallthestuffoutofthebroken-downoneandtookitaway.Apparentlytheyweregoi

In [48]:
context_chunks = []

for match in results["matches"]:
    context_chunks.append(match["metadata"]["text"])


In [49]:
context = "\n\n".join(context_chunks)

In [50]:
if not context_chunks:
    answer = "I don't know. The document does not contain this information."
else:
    answer = "Based on the document:\n\n"
    for i, chunk in enumerate(context_chunks):
        answer += f"[{i+1}] {chunk[:200]}...\n\n"


In [51]:
from sentence_transformers import CrossEncoder
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

Loading weights: 100%|██████████| 105/105 [00:00<00:00, 262.05it/s, Materializing param=classifier.weight]                                    
BertForSequenceClassification LOAD REPORT from: cross-encoder/ms-marco-MiniLM-L-6-v2
Key                          | Status     |  | 
-----------------------------+------------+--+-
bert.embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


In [52]:
pairs = []

for match in results["matches"]:
    chunk_text = match["metadata"]["text"]
    pairs.append((query_text, chunk_text))


In [53]:
scores = reranker.predict(pairs)

In [54]:
reranked = list(zip(scores, results["matches"]))
reranked.sort(key=lambda x: x[0], reverse=True)

In [55]:
for i, (score, match) in enumerate(reranked):
    print(f"Rank {i+1} | Score: {score:.4f}")
    print(match["metadata"]["text"][:300])
    print("-" * 40)

Rank 1 | Score: -8.7177
pouredhimastiffdrinkandtookitouttohim.Hedowneditinone.Talkingaboutdrinks,Ithinkthekettle'sboiling.I'lljustgoandmakethetea.Thereisn'tmuchmoretotellyouanyway.Theending?Ohthereisn'tonereally.Theirfriendsintheothervanloadedupallthestuffoutofthebroken-downoneandtookitaway.Apparentlytheyweregoingtosellita
----------------------------------------
Rank 2 | Score: -8.7716
Celia'sShrinebyDavidGardinerThisstorymaybereproducedinwholeorinpartforanynon-commercialpurposeonconditionthatauthorshipisacknowledgedandcredited.Thecopyrightremainsthepropertyoftheauthor.I'mgladyoulikethebungalow.Iwouldlikeittogotoahappyyoungcouplelikeyou.Wewerealwaysveryhappyhere.Well,ashappyasanyb
----------------------------------------
Rank 3 | Score: -8.8928
goon.IthoughttheleastwecoulddowasletthemuseCharlie'sroomforafewdayswhiletheysortedthemselvesout.Iwantedtophonethelocalhospitalandputthemonstandby,butthegirlwouldn'thaveit.Saidthatshehadtogivebirthontheearth,facingacertaindirection,whilesomekind

In [56]:
from transformers import pipeline

llm = pipeline(
    "text-generation",
    model="distilgpt2"
)


Loading weights: 100%|██████████| 76/76 [00:00<00:00, 285.32it/s, Materializing param=transformer.wte.weight]            
GPT2LMHeadModel LOAD REPORT from: distilgpt2
Key                                        | Status     |  | 
-------------------------------------------+------------+--+-
transformer.h.{0, 1, 2, 3, 4, 5}.attn.bias | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


In [57]:
llm("Hello,my name is",max_new_tokens=20)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=20) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[{'generated_text': 'Hello,my name is my first name,my password is my first name,my username is my second name,my password'}]

In [58]:
from transformers import pipeline

llm = pipeline(
    "text-generation",
    model="google/flan-t5-base",
    max_new_tokens=256
)


Loading weights: 100%|██████████| 282/282 [00:01<00:00, 236.41it/s, Materializing param=shared.weight]                                                       
The model 'T5ForConditionalGeneration' is not supported for text-generation. Supported models are ['PeftModelForCausalLM', 'AfmoeForCausalLM', 'ApertusForCausalLM', 'ArceeForCausalLM', 'AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BitNetForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'BltForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'CwmForCausalLM', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV2ForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'DogeForCausalLM', 'Dots1ForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM'

In [59]:
top_chunks=reranked[:3]

In [60]:
context = ""

for i, match in enumerate(results["matches"]):
    if match.metadata and "text" in match.metadata:
        context += f"[{i+1}] {match.metadata['text']}\n\n"


In [61]:
if context.strip() == "":
    final_answer = "I don't know. The document does not contain this information."
else:
    final_answer = "Answer based on the document:\n\n"
    final_answer += context
