In [4]:
with open("/kaggle/input/got-corpus/got_corpus.txt", "r", encoding="utf-8") as f:
    raw_text = f.read()


The system uses a Retrieval-Augmented Generation (RAG) approach with a FAISS vector store for semantic search over text chunks embedded using all-MiniLM-L6-v2. Text is split using RecursiveCharacterTextSplitter with a chunk size of 1000 and 200 overlap. Retrieved chunks are passed directly to the zephyr-7b-beta model via a HuggingFace pipeline, without summarization or additional preprocessing. No advanced context optimization techniques were applied.

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
chunks = text_splitter.split_text(raw_text)


In [6]:
!pip install -U langchain-community


Collecting langchain-community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.51 (from langchain-community)
  Downloading langchain_core-0.3.54-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain<1.0.0,>=0.3.23 (from langchain-community)
  Downloading langchain-0.3.23-py3-none-any.whl.metadata (7.8 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.8 (from langchain<1.0.0,>=0.3.23->langchain-community)
  Downloading langchain_text_splitters-0.3.8-py3-none-any.whl.metadata (1.9 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading

In [7]:
!pip install faiss-cpu


Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m53.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [8]:
from sentence_transformers import SentenceTransformer
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vectorstore = FAISS.from_texts(chunks, embedding=embeddings)


2025-04-20 15:34:49.331542: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745163289.605298      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745163289.687708      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


In [9]:
!pip install langchain==0.3.0 langchain-core==0.3.0 pydantic==2.10.4


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting langchain==0.3.0
  Downloading langchain-0.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core==0.3.0
  Downloading langchain_core-0.3.0-py3-none-any.whl.metadata (6.2 kB)
Collecting pydantic==2.10.4
  Downloading pydantic-2.10.4-py3-none-any.whl.metadata (29 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain==0.3.0)
  Downloading langsmith-0.1.147-py3-none-any.whl.metadata (14 kB)
Collecting tenacity!=8.4.0,<9.0.0,>=8.1.0 (from langchain==0.3.0)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting pydantic-core==2.27.2 (from pydantic==2.10.4)
  Downloading pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
INFO: pip is looking at multiple versions of langchain-text-splitters to determine which version is compatible with other requirements. This could take a while.
Collecting langchain-text-splitters<0.4.0,>=0.3.0 (from langchain==0.3.0)
  Downloading langchain_text_splitters-0.3.7-py3-non

USING ZEPHYR 

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Load Zephyr 7B
model_id = "HuggingFaceH4/zephyr-7b-beta"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype="auto",
    trust_remote_code=True
)

# Create HF generation pipeline
text_gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.7,
    top_p=0.9,
    do_sample=True,
)

# Use your FAISS retriever
retriever = vectorstore.as_retriever()

# Question
query = "What is the name of the sword used by Eddard Stark in the execution scene?"

# Get relevant docs (RAG retrieval)
docs = retriever.get_relevant_documents(query)

# Build prompt with retrieved context
context = "\n".join([doc.page_content for doc in docs])
prompt = f"""Answer the following question using the context below. Be as detailed as possible.

Context:
{context}

Question: {query}
Answer:"""

# Generate answer using Zephyr
result = text_gen(prompt, return_full_text=False)[0]["generated_text"]
print(result)


tokenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not in

model-00002-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00005-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00003-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00004-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00006-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00008-of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model-00007-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

In [None]:
questions = [
    ("Basic", "What is the name of the sword used by Eddard Stark in the execution scene?"),
    ("Intermediate", "What is the relationship between Jon Snow and Robb Stark, and how is it reflected in their interaction during the direwolf pup scene?"),
    ("Advanced", "Discuss the role of the Night’s Watch in the prologue and appendix, and how it serves as a microcosm of the broader political and social tensions in Westeros."),
    ("Edge Case", "Who is the leader of House Tully during the events of the Bran chapter, and what is their relationship to Catelyn Stark?"),
    ("Out-of-Context", "Does the Sorting Hat play a role in determining the houses of Westeros, such as House Stark or House Lannister?")
]

# Loop through each question
for label, query in questions:
    print(f"\n--- {label} Question ---")
    print(f"Q: {query}")
    
    # Retrieve relevant documents
    docs = retriever.get_relevant_documents(query)

    # Print top 3 retrieved document snippets
    print("\nTop Retrieved Documents:")
    for i, doc in enumerate(docs[:3], start=1):
        snippet = doc.page_content.strip().replace("\n", " ")
        print(f"{i}. {snippet[:300]}{'...' if len(snippet) > 300 else ''}")

    # Build the prompt
    context = "\n".join([doc.page_content for doc in docs])
    prompt = f"""Answer the following question using the context below. Be as detailed as possible.

Context:
{context}

Question: {query}
Answer:"""

    # Generate and print the answer
    result = text_gen(prompt, return_full_text=False)[0]["generated_text"]
    print(f"\nA: {result}")


In [19]:
# New question
query = "Discuss the role of the Night’s Watch in the prologue and appendix, and how it serves as a microcosm of the broader political and social tensions in Westeros."

# Get relevant documents (RAG retrieval)
docs = retriever.get_relevant_documents(query)

# Print the retrieved documents to understand the context
print("Retrieved Documents:\n")
for doc in docs:
    print(f"Document:\n{doc.page_content}\n{'-'*50}")

# Build prompt with the retrieved context
context = "\n".join([doc.page_content for doc in docs])
prompt = f"""Answer the following question using the context below. Be as detailed as possible.

Context:
{context}

Question: {query}
Answer:"""

# Generate the answer using Zephyr-7B
result = text_gen(prompt, return_full_text=False)[0]["generated_text"]

# Output the result
print("\nGenerated Answer:")
print(result)

Retrieved Documents:

Document:
did yourself.”
Lord Slynt’s jowls were quivering, but before he could frame a
further protest Maester Aemon said, “Your Grace, by law a man’s past
crimes and transgressions are wiped clean when he says his words and
becomes a Sworn Brother of the Night’s Watch.”
“I am aware of that. If it happens that Lord Janos here is the best
the Night’s Watch can oﬀer, I shall grit my teeth and choke him down.
It is naught to me which man of you is chosen, so long as you  make a
choice . We have a war to ﬁght.”
“Your Grace,” said Ser Denys Mallister, in tones of wary courtesy.
“If you are speaking of the wildlings . . .”
“I am not. And you know that, ser.”
“And you must know that whilst we are thankful for the aid you
rendered us against Mance Rayder, we can oﬀer you no help in your
contest for the throne. The Night’s Watch takes no part in the wars of
the Seven Kingdoms. For eight thousand years—”
“I know your history, Ser Denys,” the king said brusquely. “I give
--

In [25]:
# New question
query = "Discuss the symbolic significance of the direwolf’s death by a stag’s antler in the Bran chapter and its implications for the Stark and Baratheon houses."

# Get relevant documents (RAG retrieval)
docs = retriever.get_relevant_documents(query)

# Print the retrieved documents to understand the context
print("Retrieved Documents:\n")
for doc in docs:
    print(f"Document:\n{doc.page_content}\n{'-'*50}")

# Build prompt with the retrieved context
context = "\n".join([doc.page_content for doc in docs])
prompt = f"""Answer the following question using the context below. Be as detailed as possible.

Context:
{context}

Question: {query}
Answer:"""

# Generate the answer using Zephyr-7B
result = text_gen(prompt, return_full_text=False)[0]["generated_text"]

# Output the result
print("\nGenerated Answer:")
print(result)

Retrieved Documents:

Document:
—Arya’s direwolf, NYMERIA, lost near the Trident,
—his brother, PRINCE BRANDON, called BRAN, heir to the north, a boy of nine, believed
dead,
—Bran’s direwolf, SUMMER,
—Bran companions and protectors:
—MEERA REED, a maid of sixteen, daughter of Lord Howland Reed of Greywater
Watch,
—JOJEN REED, her brother, thirteen,
—HODOR, a simpleminded stableboy, seven feet tall,
—his brother, PRINCE RICKON, a boy of four, believed dead,
—Rickon’s direwolf, SHAGGYDOG,
—Rickon’s companion and protector:
—OSHA, a wildling captive who served as a scullion at Winterfell,
—his half-brother, JON SNOW, a Sworn Brother of the Night’s Watch,
—Jon’s direwolf, GHOST,
—his uncles and aunts, paternal:
—his father’s elder brother, {BRANDON STARK}, slain at the command of King Aerys II
Targaryen,
—his father’s sister, {LYANNA STARK}, died in the Mountains of Dorne during Robert’s
Rebellion,
—his father’s younger brother, BENJEN STARK, a man of the Night’s Watch, lost beyond
the Wal

In [2]:
questions = [
    ("Basic", "What is the name of the sword used by Eddard Stark in the execution scene?"),
    ("Intermediate", "What is the relationship between Jon Snow and Robb Stark, and how is it reflected in their interaction during the direwolf pup scene?"),
    ("Advanced", "Discuss the role of the Night’s Watch in the prologue and appendix, and how it serves as a microcosm of the broader political and social tensions in Westeros."),
    ("Edge Case", "Who is the leader of House Tully during the events of the Bran chapter, and what is their relationship to Catelyn Stark?"),
    ("Out-of-Context", "Does the Sorting Hat play a role in determining the houses of Westeros, such as House Stark or House Lannister?")
]

# Loop through each question
for label, query in questions:
    print(f"\n--- {label} Question ---")
    print(f"Q: {query}")
    
    # Retrieve relevant documents
    docs = retriever.get_relevant_documents(query)

    # Print top 3 retrieved document snippets
    print("\nTop Retrieved Documents:")
    for i, doc in enumerate(docs[:3], start=1):
        snippet = doc.page_content.strip().replace("\n", " ")
        print(f"{i}. {snippet[:300]}{'...' if len(snippet) > 300 else ''}")

    # Build the prompt
    context = "\n".join([doc.page_content for doc in docs])
    prompt = f"""Answer the following question using the context below. Be as detailed as possible.

Context:
{context}

Question: {query}
Answer:"""

    # Generate and print the answer
    result = text_gen(prompt, return_full_text=False)[0]["generated_text"]
    print(f"\nA: {result}")



--- Basic Question ---
Q: What is the name of the sword used by Eddard Stark in the execution scene?


NameError: name 'retriever' is not defined

USING FLAN

In [17]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA

# Load FLAN-T5 (base for Kaggle/Colab, use larger models if you have more GPU)
model_name = "google/flan-t5-base"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Wrap in HF pipeline
pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=1024,
    do_sample=False,
    temperature=0.3
)

# Use it in LangChain
llm = HuggingFacePipeline(pipeline=pipe)

# Build the RAG chain with your FAISS vectorstore
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)

# Ask a question
query = "Who is Jon Snow's real mother?"
result = qa_chain.invoke({"query": query})

print(result["result"])

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cuda:0


PydanticUserError: `HuggingFacePipeline` is not fully defined; you should define `Union`, then call `HuggingFacePipeline.model_rebuild()`.

For further information visit https://errors.pydantic.dev/2.11/u/class-not-fully-defined

In [5]:
!pip install rank_bm25

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2
