# RAG

In [1]:
from langchain_ollama import OllamaLLM
import pandas as pd
from langchain.document_loaders import DataFrameLoader

## Loaders

### 1. Pandas

In [2]:
df = pd.read_csv("youtube.csv")
df.head()

Unnamed: 0,RANK,GRADE,NAME,VIDEOS,SUBSCRIBERS,VIEWES
0,1,A++,T-Series,13629,105783888,76945588449
1,2,A,PewDiePie,3898,97853589,22298927681
2,3,A+,5-Minute Crafts,3341,58629572,14860695079
3,4,A++,Cocomelon - Nursery Rhymes,441,53163816,33519273951
4,5,A++,SET India,31923,51784081,36464793233


In [3]:
df = df.iloc[:,2:]
df.head()

Unnamed: 0,NAME,VIDEOS,SUBSCRIBERS,VIEWES
0,T-Series,13629,105783888,76945588449
1,PewDiePie,3898,97853589,22298927681
2,5-Minute Crafts,3341,58629572,14860695079
3,Cocomelon - Nursery Rhymes,441,53163816,33519273951
4,SET India,31923,51784081,36464793233


In [4]:
llm_data = llm_loader = DataFrameLoader(
    data_frame=df,
    page_content_column="NAME"
).load()
llm_data

[Document(metadata={'VIDEOS': 13629, 'SUBSCRIBERS': 105783888, 'VIEWES': 76945588449}, page_content='T-Series'),
 Document(metadata={'VIDEOS': 3898, 'SUBSCRIBERS': 97853589, 'VIEWES': 22298927681}, page_content='PewDiePie'),
 Document(metadata={'VIDEOS': 3341, 'SUBSCRIBERS': 58629572, 'VIEWES': 14860695079}, page_content='5-Minute Crafts'),
 Document(metadata={'VIDEOS': 441, 'SUBSCRIBERS': 53163816, 'VIEWES': 33519273951}, page_content='Cocomelon - Nursery Rhymes'),
 Document(metadata={'VIDEOS': 31923, 'SUBSCRIBERS': 51784081, 'VIEWES': 36464793233}, page_content='SET India'),
 Document(metadata={'VIDEOS': 1100, 'SUBSCRIBERS': 50560964, 'VIEWES': 25446405744}, page_content='Canal KondZilla'),
 Document(metadata={'VIDEOS': 42404, 'SUBSCRIBERS': 46098586, 'VIEWES': 34085586984}, page_content='WWE'),
 Document(metadata={'VIDEOS': 134, 'SUBSCRIBERS': 45873439, 'VIEWES': 625649566}, page_content='Justin Bieber'),
 Document(metadata={'VIDEOS': 209, 'SUBSCRIBERS': 43796634, 'VIEWES': 83543218

In [5]:
llm_data[0].model_dump()

{'id': None,
 'metadata': {'VIDEOS': 13629,
  'SUBSCRIBERS': 105783888,
  'VIEWES': 76945588449},
 'page_content': 'T-Series',
 'type': 'Document'}

### 2. URLs

In [6]:
from langchain.document_loaders import UnstructuredURLLoader
urls = [
    "https://www.apple.com/in/support/products/faqs.html",
    "https://www.apple.com/legal/sales-support/",
]

llm_loader = UnstructuredURLLoader(urls=urls)
llm_data = llm_loader.load()

In [7]:
print(llm_data[0])

page_content='Frequently Asked Questions

Where can I purchase the AppleCare Protection Plan?

The AppleCare Protection Plan is available at the Apple Online Store and many Apple-authorised resellers and wireless service providers.

How do I initiate repair service under the AppleCare Protection Plan?

Carry-in service. Carry your product into an Apple Authorised Service Provider.

Onsite service. Contact us and we’ll help you arrange an Apple-authorised repair for your desktop Mac at your location, at no additional charge.

Do-It-Yourself service. Contact us and we may be able to send you what you need to service your own product, such as accessories.

The AppleCare Protection Plan for Mac, for iPod and for Apple TV provide global repair coverage. Service will be limited to the options available in the country where service is requested. Service options, parts availability and response times vary by country.

All repairs will be completed using genuine Apple parts for repair service. 

### 3. WikiPedia

In [8]:
from langchain.document_loaders import WikipediaLoader

query = "Elon Musk"
llm_data = WikipediaLoader(query=query,load_max_docs=3,doc_content_chars_max=20_000).load()

In [9]:
print(llm_data[0])

page_content='Elon Reeve Musk  ( EE-lon; born June 28, 1971) is a businessman, known for his leadership of Tesla, SpaceX, X (formerly Twitter), and the Department of Government Efficiency (DOGE). Musk has been the wealthiest person in the world since 2021; as of May 2025, Forbes estimates his net worth to be US$424.7 billion.
Born to a wealthy family in Pretoria, South Africa, Musk emigrated in 1989 to Canada. He received bachelor's degrees from the University of Pennsylvania in 1997 before moving to California, United States, to pursue business ventures. In 1995, Musk co-founded the software company Zip2. Following its sale in 1999, he co-founded X.com, an online payment company that later merged to form PayPal, which was acquired by eBay in 2002. That year, Musk also became an American citizen.
In 2002, Musk founded the space technology company SpaceX, becoming its CEO and chief engineer; the company has since led innovations in reusable rockets and commercial spaceflight. Musk joine

### 4.PDF

In [10]:
from langchain.document_loaders import PyPDFLoader
pages = PyPDFLoader("CV.pdf").load_and_split()

In [11]:
print(pages[0])

page_content='MOAAZ ANWAR SOLIMAN
AI ENGINEER
moaazanwarsoliman@gmail.com
 
+201116823857 +201014397578
 
Egypt, Menofia
 
github.com/Animo-GD
 
linkedin.com/in/moaaz solomon/
 
OBJECTIVE
I am motivated to engage in a new experience in the field of Al and Machine Learning, expand my horizons of 
knowledge, and gain experience from experts on the ground. I am looking for a training opportunity as a fresh 
graduated to gain experience from experts in this field and work on real-world projects.
EDUCATION
Faculty of Electronic Engineering – Menoufia University
Major: Computer Science and Engineering
2019 – 2025
National Telecommunication Institute (NTI)
Completed 120 hours of intensive training in artificial intelligence, covering 
machine learning, deep learning, computer vision, and natural language processing.
09/2023 – 11/2023
Information Technology Institute (ITI)
Gained hands-on experience in Internet of Things concepts, including sensor 
integration, data collection, communication p

In [12]:
print(pages[0])

page_content='MOAAZ ANWAR SOLIMAN
AI ENGINEER
moaazanwarsoliman@gmail.com
 
+201116823857 +201014397578
 
Egypt, Menofia
 
github.com/Animo-GD
 
linkedin.com/in/moaaz solomon/
 
OBJECTIVE
I am motivated to engage in a new experience in the field of Al and Machine Learning, expand my horizons of 
knowledge, and gain experience from experts on the ground. I am looking for a training opportunity as a fresh 
graduated to gain experience from experts in this field and work on real-world projects.
EDUCATION
Faculty of Electronic Engineering – Menoufia University
Major: Computer Science and Engineering
2019 – 2025
National Telecommunication Institute (NTI)
Completed 120 hours of intensive training in artificial intelligence, covering 
machine learning, deep learning, computer vision, and natural language processing.
09/2023 – 11/2023
Information Technology Institute (ITI)
Gained hands-on experience in Internet of Things concepts, including sensor 
integration, data collection, communication p

### 5. PDF Contains Images

In [13]:
import fitz  # PyMuPDF
import pytesseract
from PIL import Image
import io
from langchain.schema import Document
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
class OCRPDFLoader:
    def __init__(self, file_path):
        self.file_path = file_path

    def load(self):
        doc = fitz.open(self.file_path)
        documents = []
        
        for page_num in range(len(doc)):
            # Render page as image
            pix = doc[page_num].get_pixmap()
            img = Image.open(io.BytesIO(pix.tobytes()))
            
            # OCR
            text = pytesseract.image_to_string(img)
            
            # Wrap into LangChain Document
            documents.append(
                Document(
                    page_content=text,
                    metadata={"source": self.file_path, "page": page_num+1}
                )
            )
        return documents


# ---- USAGE ----
loader = OCRPDFLoader("ImagePDF.pdf")
docs = loader.load()

# Now you can send `docs` to any LangChain text splitter or vector store
from langchain.text_splitter import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = splitter.split_documents(docs)

print(chunks[0].page_content)  # preview


@ Machine Learning Pipeline Cheat Sheet

1. Data Collection
© Ed what: Gather raw data from files, APIs, sensors, etc.

© R Tools: pandas, requests, SQL, scrapy, Seautifulsoup

F = pd. read_csv(‘data.csv')

2. Data Preprocessing
a.Cleaning

‘© Handle missing values ( aw )
+ Fixdata types

# Remove duplicates

# Normalize formats

4€( peice’ |-astype(+ioat)

Text Cleanup (NLP-specific)
© Remove punctuation, stopwords, URLS
# Lowercase conversion

* Tokenization

from sklearn feature extraction text inport ENGLISM_STOP_WORDS
from nitk.tokenize inport word tokentize

3. Exploratory Data Analysis (EDA)


### 6. Directory

In [14]:
from langchain.document_loaders import DirectoryLoader
llm_loader = DirectoryLoader("md-data",glob="*.txt",show_progress=True)

llm_data = llm_loader.load()

  0%|          | 0/3 [00:00<?, ?it/s]libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
100%|██████████| 3/3 [00:00<00:00, 60.16it/s]


In [15]:
print(llm_data[0])

page_content='If you want, I can set this up so LangChain downloads → checks PDF type → uses OCR if needed, so it works even for screenshot PDFs.

Do you want me to make that robust version?' metadata={'source': 'md-data\\1.txt'}


In [16]:
from langchain.document_loaders import PyPDFDirectoryLoader
llm_loader = PyPDFDirectoryLoader("pdf-files")
llm_data = llm_loader.load()

-----
## Text Splitter

In [17]:
from langchain.document_loaders import WikipediaLoader
query_1 = "Yahya Sinwar"
query_2 = "mohamed el deif"

docs_1 = WikipediaLoader(query=query_1,load_max_docs=1,doc_content_chars_max=20_000).load()
docs_2 = WikipediaLoader(query=query_2,load_max_docs=1,doc_content_chars_max=20_000).load()

In [18]:
text_document1 = docs_1[0].page_content
text_document2 = docs_2[0].page_content

In [19]:
print(text_document1)

Yahya Ibrahim Hassan Sinwar (Arabic: يحيى إبراهيم حسن السنوار, romanized: Yaḥyá Ibrāhīm Ḥasan al-Sinwār; 29 October 1962 – 16 October 2024) was a Palestinian militant and politician who served as fourth chairman of the Hamas Political Bureau from August 2024, and as the second leader of Hamas in the Gaza Strip from February 2017, succeeding Ismail Haniyeh in both roles. He was killed in a clash with the Israel Defense Forces (IDF) in October 2024.
Sinwar was born in the Khan Yunis refugee camp in Egyptian-occupied Gaza in 1962 to a family who had been expelled or fled from Majdal 'Asqalan during the 1948 Palestine War. He finished his studies at the Islamic University of Gaza, where he received a bachelor's degree in Arabic studies. In 1989, Sinwar was sentenced to four life sentences in Israel for orchestrating the abduction and killing of two Israeli soldiers and four Palestinians he considered to be collaborators. He spent 22 years in prison until his release among 1,026 others in a

In [20]:
print(text_document2)

Mohammed Diab Ibrahim al-Masri (Arabic: محمد دياب إبراهيم المصري, romanized: Muḥammad Diyāb Ibrāhīm al-Maṣrī;‎ 12 August 1965 – 13 July 2024), better known as Mohammed Deif (Arabic: محمد الضيف), was a Palestinian militant who served as the sixth commander of the Izz al-Din al-Qassam Brigades. He succeeded Salah Shehade following the latter's assassination in an Israeli airstrike in July 2002, until his assassination in July 2024. He was succeeded by Mohammed Sinwar, the younger brother of Yahya Sinwar. He was a central figure in the Gaza war and one of the masterminds of the October 7 attacks, alongside Yahya Sinwar.
Deif was born in 1965 in the Khan Yunis Refugee Camp in the Gaza Strip, to a family that had fled or been expelled during the 1948 Palestine war. He reportedly left school temporarily to support his low-income family, later graduating with a bachelor's degree in chemistry from the Islamic University of Gaza in 1988, where he had established a theater group.
Deif joined Ham

In [21]:
documents = [text_document1,text_document2]
metadata = [{"document":query_1},{"document":query_2}]

### 1. Characters

In [22]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(
    separator="",
    chunk_size=100,
    chunk_overlap=30,
    length_function = len
)

In [23]:
normal_chunks = text_splitter.create_documents(documents,metadatas=metadata)

In [24]:
len(normal_chunks)

550

In [25]:
print(normal_chunks[0])

page_content='Yahya Ibrahim Hassan Sinwar (Arabic: يحيى إبراهيم حسن السنوار, romanized: Yaḥyá Ibrāhīm Ḥasan al-Sin' metadata={'document': 'Yahya Sinwar'}


In [26]:
print(normal_chunks[1])

page_content='ed: Yaḥyá Ibrāhīm Ḥasan al-Sinwār; 29 October 1962 – 16 October 2024) was a Palestinian militant and' metadata={'document': 'Yahya Sinwar'}


In [27]:
print(normal_chunks[2])

page_content='was a Palestinian militant and politician who served as fourth chairman of the Hamas Political Burea' metadata={'document': 'Yahya Sinwar'}


### 2. NLTK

In [28]:
from langchain.text_splitter import NLTKTextSplitter
import nltk
nltk.download("punkt")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\mo3az\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [29]:
text_splitter = NLTKTextSplitter(
    chunk_size = 500
)

In [30]:
nltk_chunks = text_splitter.create_documents(documents,metadatas=metadata)

Created a chunk of size 537, which is longer than the specified 500


In [31]:
len(nltk_chunks)

124

In [32]:
print(nltk_chunks[5].page_content)

He also developed strong ties with Iran.

Re-elected as Hamas leader in 2021, Sinwar survived an assassination attempt by Israel that same year.

He was widely regarded as the mastermind behind the October 7 attacks in 2023, which was followed by the Gaza war that spilled over to other parts of the Middle East.


In [33]:
print(nltk_chunks[5].metadata)

{'document': 'Yahya Sinwar'}


### 4. Transformers Tokens

In [34]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("openai/gpt-oss-20b")

  from .autonotebook import tqdm as notebook_tqdm


In [35]:
text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(tokenizer=tokenizer,chunk_size=400,chunk_overlap=0)

In [36]:
chunks = text_splitter.create_documents(documents,metadatas=metadata)


Created a chunk of size 545, which is longer than the specified 400
Created a chunk of size 1748, which is longer than the specified 400
Created a chunk of size 746, which is longer than the specified 400
Created a chunk of size 599, which is longer than the specified 400
Created a chunk of size 609, which is longer than the specified 400
Created a chunk of size 563, which is longer than the specified 400


In [37]:
print(chunks[0].page_content)

Yahya Ibrahim Hassan Sinwar (Arabic: يحيى إبراهيم حسن السنوار, romanized: Yaḥyá Ibrāhīm Ḥasan al-Sinwār; 29 October 1962 – 16 October 2024) was a Palestinian militant and politician who served as fourth chairman of the Hamas Political Bureau from August 2024, and as the second leader of Hamas in the Gaza Strip from February 2017, succeeding Ismail Haniyeh in both roles. He was killed in a clash with the Israel Defense Forces (IDF) in October 2024.
Sinwar was born in the Khan Yunis refugee camp in Egyptian-occupied Gaza in 1962 to a family who had been expelled or fled from Majdal 'Asqalan during the 1948 Palestine War. He finished his studies at the Islamic University of Gaza, where he received a bachelor's degree in Arabic studies. In 1989, Sinwar was sentenced to four life sentences in Israel for orchestrating the abduction and killing of two Israeli soldiers and four Palestinians he considered to be collaborators. He spent 22 years in prison until his release among 1,026 others in a

-------
## Embedding

### 1. Hugging Face

!pip install sentence-transformers

In [38]:
from langchain.embeddings import SentenceTransformerEmbeddings
model = SentenceTransformerEmbeddings(model_name="intfloat/multilingual-e5-small")

  model = SentenceTransformerEmbeddings(model_name="intfloat/multilingual-e5-small")


In [39]:
chunks = ["Hi how are you?","Good Morning"]
embeddings = model.embed_documents(chunks)

In [40]:
print(len(embeddings[0]))

384


In [41]:
chunks = text_splitter.create_documents(documents,metadatas=metadata)

Created a chunk of size 545, which is longer than the specified 400
Created a chunk of size 1748, which is longer than the specified 400
Created a chunk of size 746, which is longer than the specified 400
Created a chunk of size 599, which is longer than the specified 400
Created a chunk of size 609, which is longer than the specified 400
Created a chunk of size 563, which is longer than the specified 400


In [42]:
docs_text = [chunk.page_content for chunk in chunks]
docs_embeddings = model.embed_documents(docs_text)

In [43]:
print(len(docs_embeddings))
print(len(docs_text))
print(len(docs_embeddings[0]))

19
19
384


In [44]:
question = "When Yahya Sinwar born?"
question_embedding = model.embed_query(question)

### 2. Cohere

!pip install cohere

In [45]:
from dotenv import dotenv_values
env_values = dotenv_values("app.env")
cohere_api_key = env_values["COHERE_API_KEY"]

In [46]:
from langchain_cohere import CohereEmbeddings
embedding_llm = CohereEmbeddings(cohere_api_key=cohere_api_key,model="embed-english-v3.0")

In [47]:
docs_embeddings = embedding_llm.embed_documents(docs_text)

In [48]:
len(docs_embeddings[0])

1024

In [49]:
question = "When Yahya Sinwar born?"
question_embedding = embedding_llm.embed_query(question)

----------
## Vector Stores

### 1. FAISS

!pip install faiss-cpu

In [50]:
from langchain.vectorstores import FAISS
vector_db = FAISS.from_documents(chunks,embedding_llm)

In [51]:
query = "When Yahya Sinwar born?"
similar_docs = vector_db.similarity_search(query=query)

In [52]:
print(similar_docs[0])

page_content='== Early life and education ==
Yahya Ibrahim Hassan al-Sinwar was born on 29 October 1962, in the Khan Yunis refugee camp, when the Gaza Strip was under Egyptian occupation, where he spent his early years. His family were forcibly expelled from Majdal Asqalan (Arabic: مدينة المجدل, romanized: Medīnat al-Majdal), now known as Ashkelon, during the Nakba, and sought refuge in the Gaza Strip. Sinwar, discussing his refugee upbringing, tied it to his Hamas involvement in conversations with fellow prisoners during his later imprisonment. According to Esmat Mansour, another inmate, Sinwar was deeply affected by the communal living conditions and food distribution in the refugee camp. After he graduated from high school at Khan Yunis Secondary School for Boys, he went on to the Islamic University of Gaza, where he received a bachelor's degree in Arabic studies. His younger brother is Mohammed Sinwar, a military leader of Hamas. Sinwar was a hafiz (a title given to those who have 

### 2. Chroma

!pip install chromadb

In [53]:
from langchain.vectorstores import Chroma

save_to_dir = "wiki_chroma_db"

docs_ids = list(range(len(chunks)))
docs_ids = list(map(str,docs_ids))
vector_db = Chroma.from_documents(chunks,embedding_llm,persist_directory=save_to_dir,ids=docs_ids)

In [54]:
query = "In which year Yahya Sinwar died?"
similar_docs = vector_db.similarity_search(query=query,k=3,filter={"document":"Yahya Sinwar"})

In [55]:
print(similar_docs[0])

page_content='Yahya Ibrahim Hassan Sinwar (Arabic: يحيى إبراهيم حسن السنوار, romanized: Yaḥyá Ibrāhīm Ḥasan al-Sinwār; 29 October 1962 – 16 October 2024) was a Palestinian militant and politician who served as fourth chairman of the Hamas Political Bureau from August 2024, and as the second leader of Hamas in the Gaza Strip from February 2017, succeeding Ismail Haniyeh in both roles. He was killed in a clash with the Israel Defense Forces (IDF) in October 2024.
Sinwar was born in the Khan Yunis refugee camp in Egyptian-occupied Gaza in 1962 to a family who had been expelled or fled from Majdal 'Asqalan during the 1948 Palestine War. He finished his studies at the Islamic University of Gaza, where he received a bachelor's degree in Arabic studies. In 1989, Sinwar was sentenced to four life sentences in Israel for orchestrating the abduction and killing of two Israeli soldiers and four Palestinians he considered to be collaborators. He spent 22 years in prison until his release among 1,0

In [56]:
# Save
vector_db.persist()

  vector_db.persist()


In [57]:
# Load from disk
loaded_vector_db = Chroma(
    persist_directory=save_to_dir,
    embedding_function=embedding_llm
)

  loaded_vector_db = Chroma(


In [58]:
query = "In which year Yahya Sinwar died?"
similar_docs = loaded_vector_db.similarity_search(query=query,k=3,filter={"document":"Yahya Sinwar"})

In [59]:
print(similar_docs[0])

page_content='Yahya Ibrahim Hassan Sinwar (Arabic: يحيى إبراهيم حسن السنوار, romanized: Yaḥyá Ibrāhīm Ḥasan al-Sinwār; 29 October 1962 – 16 October 2024) was a Palestinian militant and politician who served as fourth chairman of the Hamas Political Bureau from August 2024, and as the second leader of Hamas in the Gaza Strip from February 2017, succeeding Ismail Haniyeh in both roles. He was killed in a clash with the Israel Defense Forces (IDF) in October 2024.
Sinwar was born in the Khan Yunis refugee camp in Egyptian-occupied Gaza in 1962 to a family who had been expelled or fled from Majdal 'Asqalan during the 1948 Palestine War. He finished his studies at the Islamic University of Gaza, where he received a bachelor's degree in Arabic studies. In 1989, Sinwar was sentenced to four life sentences in Israel for orchestrating the abduction and killing of two Israeli soldiers and four Palestinians he considered to be collaborators. He spent 22 years in prison until his release among 1,0

## Answering the query

### 1. Stuff context

In [60]:
from langchain_ollama import OllamaLLM
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
llm = OllamaLLM(model="mistral")

template_message = "\n".join([
    "Answer the following Question using the provided Context",
    "If No Answer in the context return 'No Answers Found'",
    "# Context",
    "{context}",
    "# Question",
    "{query}",
    "# Answer"
])
qna_prompt = PromptTemplate(
    template=template_message,
    input_variables=["context","query"]
)


query = "In which year Yahya Sinwar died?"




In [61]:
stuff_chain = load_qa_chain(llm=llm,chain_type="stuff",prompt=qna_prompt)

stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  stuff_chain = load_qa_chain(llm=llm,chain_type="stuff",prompt=qna_prompt)


In [63]:
answer = stuff_chain(
    {
        "input_documents":similar_docs,
        "query":query
    },
    return_only_outputs=True
)

In [64]:
print(answer)

{'output_text': ' Yahya Sinwar died in October 2024.'}


### 2. MapReduce

In [65]:
qna_template = "\n".join([
    "Answer the next question using the provided context",
    "If the answer is not contained in the context say 'No Answer Is Available'",
    "# Context:",
    "{context}",
    "",
    "# Question:",
    "{query}",
    "",
    "# Answer:"
])

qna_prompt = PromptTemplate(template=qna_template,input_variables=["context","query"])

In [88]:
combine_template = "\n".join([
    "Given intermediate contexts for a question, generate a final answer",
    "If the answer not contained in the intermediate contexts say 'No Answer Is Available' ",
    "No Introduction, No Conlusion, and Direct Answer"
    "# Summaries:",
    "{summaries}",
    "",
    "# Question:",
    "{query}",
    "",
    "# Final Answer:"
])

combine_prompt = PromptTemplate(template=combine_template,input_variables=["summaries","query"])

In [89]:
query = "Why is Mohamed Dief Known by Dief?"

similar_docs = vector_db.similarity_search(query=query,k=5)

In [90]:
map_reduce_chain = load_qa_chain(
    llm=llm,
    chain_type="map_reduce",
    question_prompt = qna_prompt,
    combine_prompt = combine_prompt
    )

In [91]:
final_answer = map_reduce_chain(
    {
        "input_documents":similar_docs,
        "query":query
    },
    return_only_outputs=True
)

In [92]:
print(final_answer["output_text"])

 Mohamed Deif is known as Dief because it is a nom de guerre he took, and 'Dief' means 'guest' in Arabic. Another explanation suggests that the name comes from a theatre role he played in his earlier years. Additionally, he is sometimes referred to as Deif due to his nickname, which was given to him because he survived at least seven Israeli assassination attempts, earning him the nickname 'the cat with nine lives' among his Israeli adversaries.


### 3. Refine

In [95]:
initial_qna_template = "\n".join([
    "Answer the following question using only this",
    "# Context:",
    "{context_str}",
    "",
    "# Question:",
    "{question}",
    "",
    "# Answer:"
])

initial_qna_prompt = PromptTemplate(
    template=initial_qna_template,
    input_variables=["context_str","question"]
)

refine_template = "\n".join([
    "Refine the existing answer, if required, with the following context.",
    "If answer is not available say 'No Answer Found For This Context' ",
    "# Context:",
    "{context_str}",
    "",
    "# Existing Answer:",
    "{existing_answer}",
    "",
    "# Question:",
    "{question}",
    "",
    "# Refined Answer:"
])

refine_prompt = PromptTemplate(
    template=refine_template,
    input_variables=["context_str","existing_answer","question"]
)

In [96]:
refine_chain = load_qa_chain(
    llm = llm,
    chain_type="refine",
    question_prompt = initial_qna_prompt,
    refine_prompt=refine_prompt
)

In [98]:
refined_answer = refine_chain(
    {
        "input_documents":similar_docs,
        "question":query
    }, return_only_outputs=True
)

In [99]:
print(refined_answer["output_text"])

 Mohamed Deif, colloquially known as "Dief," earned this nickname due to its translation to "guest" in Arabic. This moniker was given to him because of his practice of frequently staying in different houses each night as a precaution against Israeli attacks. Another possible explanation is that the name originated from a theater role he played during his younger years, which also involved portraying guests or transient characters. Deif, a prominent member of Hamas, joined the organization during the First Intifada and was involved in various militant activities, including suicide bombings and the kidnapping and killing of Israeli soldiers. His frequent changes of location as a means of evading capture by Israeli authorities contributed to his nickname "Dief." Tragically, Deif lost his wife Widad Asfoura, their infant son Ali, and three-year-old daughter Sarah in an assassination attempt in 2014, leaving behind two other children: Omar and Hala. He also had two other sons, Bahaa and Kha