In [1]:
#!pip install langchain-community pypdf

In [2]:
from dotenv import load_dotenv
load_dotenv()
import os

In [3]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash")

In [4]:
prompt_template="""You are an assistant for question answering tasks. Use the following pieces of retreived context to answer the question.
If you don't know the answer just say you don't know. Keep it brief.
Question: {question}
Context: {context}
"""

In [5]:
with open('MobyDick.txt', encoding='utf-8', errors='replace') as book:
    text = book.read()

text[1000:1100]

'Inn.\n\nCHAPTER 4. The Counterpane.\n\nCHAPTER 5. Breakfast.\n\nCHAPTER 6. The Street.\n\nCHAPTER 7. The Cha'

In [6]:
# Better printing

from IPython.display import Markdown

In [7]:
from langchain_community.document_loaders import TextLoader
loader = TextLoader("war.txt", encoding='utf-8')
docs = loader.load()

In [10]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,  # chunk size (characters)
    chunk_overlap=100,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split given book into {len(all_splits)} sub-documents.")

Split given book into 1035 sub-documents.


EMBEDDINGS

In [16]:
# Free alternative to Gemini embeddings
from langchain_community.embeddings import HuggingFaceEmbeddings

# Load a small, fast embedding model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create a vector store
from langchain_core.vectorstores import InMemoryVectorStore
vector_store = InMemoryVectorStore(embeddings)

# Adding documents to vector store in small batches (to avoid memory spikes)
batched_ids = []
batch_size = 50  # you can adjust this up/down
for i in range(0, len(all_splits), batch_size):
    batch = all_splits[i:i+batch_size]
    ids = vector_store.add_documents(documents=batch)
    batched_ids.extend(ids)

print(f"Added {len(batched_ids)} documents to vector store using HuggingFace embeddings.")


Added 1035 documents to vector store using HuggingFace embeddings.


In [17]:
ids

['485197d1-2c65-4c36-a3bf-3243834545a4',
 '73b80527-7108-40c4-920d-8cae43135ed8',
 'f091d484-457c-4e97-8be9-d6e607b2c6be',
 'b9c5b244-ccd8-4046-b8cf-f41cb46dc591',
 'e3e01211-3f15-4e45-8899-dd66964c9a19',
 'b3adceb9-3bfb-4f05-b49f-b65e6211e88e',
 '21926595-ba9e-455c-9701-e52694129294',
 'b33c8ffa-c158-4359-8e64-fe3093cbc00a',
 '8838226b-fdae-46ac-aeda-30dd8bc0e38d',
 'bfe6f063-1547-4765-9f1d-ed0a38490ed1',
 '27a029e1-1a3b-4f92-a1dd-cab93b165d63',
 'b8fe92dc-ef50-40a5-b4c4-5ee2e19278d1',
 'b8439334-dc8f-42a5-9a13-2a4c453f80f2',
 '704b05b5-f50a-4494-aca8-5d79b64ae919',
 'd187b408-e9a4-40c0-b7fa-b4b986cf859a',
 '2f25f4e6-1cef-4833-8bf2-b7c5b21a626b',
 '35b1833f-39a3-4b0b-bf43-4b5a96a6c7d9',
 '02a2e95d-288d-4c05-8338-ec22dd0e84e4',
 '4cd8e898-2f4e-42c3-9e76-539ecbfb18d6',
 'c8271c13-cac4-45cf-9608-e7f6f9a1f416',
 '09373ac6-3c2d-4832-b9da-c483bfbfa71f',
 'c41c53b5-1f68-402c-9db5-ff19fe096484',
 '74719fd8-b6b7-47a5-a149-b7847a49d268',
 'a1553631-11e0-4403-bb0e-dfdf7675549a',
 '33bda22c-564f-

In [18]:
search_results = vector_store.similarity_search_with_score(
    "What is the story about?",
    k = 10
)

search_results

[(Document(id='3535b1f8-59e8-4f4b-87df-db98ba9e7ebc', metadata={'source': 'war.txt', 'start_index': 172155}, page_content='That was the story they told my brother in fragments when presently\nthey stopped again, nearer to New Barnet. He promised to stay with\nthem, at least until they could determine what to do, or until the\nmissing man arrived, and professed to be an expert shot with the\nrevolver—a weapon strange to him—in order to give them confidence.'),
  0.42839167843779713),
 (Document(id='45edddb4-9af2-4da5-ae35-f1c69c5ec455', metadata={'source': 'war.txt', 'start_index': 21606}, page_content='By eight o’clock a number of boys and unemployed men had already\nstarted for the common to see the “dead men from Mars.” That was the\nform the story took. I heard of it first from my newspaper boy about a\nquarter to nine when I went out to get my _Daily Chronicle_. I was\nnaturally startled, and lost no time in going out and across the\nOttershaw bridge to the sand-pits.\n\n\n\n\nIII.