In [1]:
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.document_loaders import PyPDFLoader
from langchain.prompts import ChatPromptTemplate
from llm_call import LLMCall
from embeddings import Embeddings
from operator import itemgetter

In [2]:
# Load PDF and split it into chunks
pdf_file = 'sample.pdf'
chunk_size = 1000
chunk_overlap = 200

loader = PyPDFLoader(pdf_file)
documents = loader.load()

# Split the document into manageable chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size, chunk_overlap=chunk_overlap
)
texts = text_splitter.split_documents(documents)

  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4


In [3]:
texts[0]

Document(metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 19.3 (Macintosh)', 'creationdate': '2024-06-18T14:09:48-07:00', 'moddate': '2024-06-18T14:10:14-07:00', 'trapped': '/False', 'source': 'sample.pdf', 'total_pages': 4, 'page': 0, 'page_label': '1'}, page_content='Before using iPhone, review the iPhone User Guide  at  \nsupport.apple.com/guide/iphone .\nSafety and Handling\nSee ‚ÄúSafety, handling, and support‚Äù in the iPhone  \nUser Guide .\nExposure to Radio Frequency\nOn iPhone, go to Settings > General > Legal &  \nRegulatory > RF Exposure. Or go to apple.com/  \nlegal/rfexposure .\nBattery and Charging\nAn iPhone battery should only be repaired by a trained \ntechnician to avoid battery damage, which could cause \noverheating, fire, or injury. Batteries should be recycled \nor disposed of separately from household waste and \naccording to local environmental laws and guidelines. For \ninformation about Apple lithium-ion batteries and battery \nservi

In [4]:
len(texts)

8

In [5]:
# Multi-query template
multi_query_template = """

You are a question(query) generator. You can create multiple questions similar with multiple prespective to the given question with the same meaning.
By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Please generate 5 questions similar to the given question.
Make sure to use different words and phrases to express the same idea.
The questions should be clear and concise.
The questions should be grammatically correct and easy to understand.
The questions should be in English.
Provide these alternative questions separated by newlines.

The question is: {question}

"""

In [6]:
m_query_prompt = ChatPromptTemplate.from_messages(    [
	("system", "You are a helpful assistant."),
	("human", multi_query_template),
    ]
)


In [7]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

In [8]:
# Define the custom prompt template for generation

rag_template = """
You are a customer service agent for a apple mobile company. 
You have been given the following information about the customer question and the context.
Customer Query: {question}
Context: {context}

Answer: 
The answer should be based on the context provided.
Your task is to answer the customer question based on the context provided. If the question is not related to the context, please say "I don't know or Do Not Answer it just say please ask me question related to Apple Mobiles only".
Do not make up any information or provide any personal opinions or experiences.
Please answer in a friendly and professional manner.
"""

In [9]:
rag_prompt = ChatPromptTemplate.from_messages(
    [
	("system", "You are a helpful assistant."),
	("human", rag_template),
    ]
)

print(rag_prompt)

input_variables=['context', 'question'] input_types={} partial_variables={} messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a helpful assistant.'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\nYou are a customer service agent for a apple mobile company. \nYou have been given the following information about the customer question and the context.\nCustomer Query: {question}\nContext: {context}\n\nAnswer: \nThe answer should be based on the context provided.\nYour task is to answer the customer question based on the context provided. If the question is not related to the context, please say "I don\'t know or Do Not Answer it just say please ask me question related to Apple Mobiles only".\nDo not make up any information or provide any personal opinions or experiences.\nPlease answer in a

Using Azure OpenAI Embeddings and Model

In [10]:
open_ai_embeddings = Embeddings.azure_openai()

In [11]:
vectorstore = FAISS.from_documents(
    texts,
    open_ai_embeddings
)

In [12]:
retriever = vectorstore.as_retriever()

In [13]:
open_ai_llm = LLMCall.azure_openai()

In [14]:
# To Generate the Multiple questions based on given question to retrive documents in a better way.
multi_query_chain = m_query_prompt| open_ai_llm | StrOutputParser() | (lambda x: x.split("\n"))

In [15]:
'''
Testing/ Showing if we are getting our questions of not and we are checking the length of unique chunks from it.
'''

# Retrieve
question = "Is there a warranty on the phone?"
multi_query_retrieval_chain = multi_query_chain | retriever.map() | get_unique_union
docs = multi_query_retrieval_chain.invoke({"question":question})
len(docs)

  return [loads(doc) for doc in unique_docs]


5

In [16]:
final_rag_chain = (
    {"context": multi_query_retrieval_chain, 
     "question": itemgetter("question")} 
    | rag_prompt
    | open_ai_llm
    | StrOutputParser()
)

In [17]:
response = final_rag_chain.invoke({"question":question})

print('üì¶ Answer:', response)

üì¶ Answer: Yes, there is a warranty on the phone. Apple offers a One-Year Limited Warranty that covers defects in materials and workmanship for one year from the date of original retail purchase. However, this warranty does not cover normal wear and tear or damage caused by accident or abuse. If you need service, you can call Apple or visit an Apple Store or an Apple Authorized Service Provider. For more detailed information, you can visit apple.com/legal/warranty.


Using HuggingFace Model and Embeddings

In [18]:
huggingface_embeddings = Embeddings.huggingface()


In [19]:
vectorstore = FAISS.from_documents(
    texts,
    huggingface_embeddings
)

In [20]:
retriever = vectorstore.as_retriever()

In [21]:
# Using Same Multi-query template as above 
# but as we mentioned above but here we are using huggingface embeddings and huggingface models

In [22]:
# huggingface_llm = LLMCall.huggingface()

In [23]:
multi_query_chain = m_query_prompt | open_ai_llm | StrOutputParser() | (lambda x: x.split("\n"))
multi_query_retrieval_chain = multi_query_chain | retriever.map() | get_unique_union

In [24]:
final_rag_chain = (
    {"context": multi_query_retrieval_chain, 
     "question": itemgetter("question")} 
    | rag_prompt
    | open_ai_llm
    | StrOutputParser()
)

# huggingface_llm

In [25]:
response = final_rag_chain.invoke({"question":question})

print('üì¶ Answer:', response)

üì¶ Answer: Yes, there is a warranty on the phone. Apple offers a one-year limited warranty that covers defects in materials and workmanship for the included hardware product and accessories from the date of original retail purchase. However, this warranty does not cover normal wear and tear or damage caused by accident or abuse. If you need to obtain service, you can call Apple or visit an Apple Store or an Apple Authorized Service Provider. For more detailed information, you can visit apple.com/legal/warranty. If you have any further questions, feel free to ask!


Using Ollama

> We are using the same huggingFace embeddings for Ollama to but instead of using model from huggingface or OpenAI, we use our locally downloaded model through `Ollama` which is llama3.2 in this example. If you want to download or run this example with any other model please check the ollama model library and download it using `ollama run <model name>`

In [26]:
ollama_llm = LLMCall.chat_ollama()

In [27]:
multi_query_chain = m_query_prompt | ollama_llm | StrOutputParser() | (lambda x: x.split("\n"))
multi_query_retrieval_chain = multi_query_chain | retriever.map() | get_unique_union

In [28]:
final_rag_chain = (
    {"context": multi_query_retrieval_chain, 
     "question": itemgetter("question")} 
    | rag_prompt
    | ollama_llm
    | StrOutputParser()
)

In [34]:
response = final_rag_chain.invoke({"question":question})

print('üì¶ Answer:', response)

üì¶ Answer: Yes, there is a warranty on the phone. According to the information provided, Apple offers a one-year limited warranty on the hardware product and accessories against defects in materials and workmanship from the date of original retail purchase. You can find more detailed information on obtaining service and the full terms of the warranty at apple.com/legal/warranty and support.apple.com.


Using Groq Infrance API 

> In this example we are using groq infrance api key to harness the power of fast infrance enabled api model from groq. In my case I am using `llama-3.3-70b-versatile` aka `lamma-3.3`. But again if you want to use any other model use can pass the name of model in the functiin which I am calling here or you can also change it in `llm_call.py` file under chat_groq function of class `LLMClaa`

In [30]:
groq_llm = LLMCall.chat_groq()

In [31]:
multi_query_chain = m_query_prompt | groq_llm | StrOutputParser() | (lambda x: x.split("\n"))
multi_query_retrieval_chain = multi_query_chain | retriever.map() | get_unique_union

In [32]:
final_rag_chain = (
    {"context": multi_query_retrieval_chain, 
     "question": itemgetter("question")} 
    | rag_prompt
    | groq_llm
    | StrOutputParser()
)

In [33]:
response = final_rag_chain.invoke({"question":question})

print('üì¶ Answer:', response)

üì¶ Answer: Yes, there is a warranty on the phone. According to the Apple One-Year Limited Warranty Summary, Apple warrants the included hardware product and accessories against defects in materials and workmanship for one year from the date of original retail purchase. You can find more detailed information on obtaining service at apple.com/legal/warranty and support.apple.com.


<!-- Font Awesome CDN (Add in <head> if not already included) -->
<link
  rel="stylesheet" 
  href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.0/css/all.min.css"
/>

<!-- Social Footer Section -->
<div style="
  background-color:rgb(199, 195, 195);
  padding: 40px 30px;
  border-radius: 20px;
  box-shadow: 0 4px 12px rgba(0,0,0,0.08);
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
  font-size: 18px;
  max-width: 900px;
  margin: 60px auto 30px;
  text-align: center;
  color: #444;
">
<!-- End of Notebook Note -->
  <h2 style="margin-bottom: 10px;">üìò End of Notebook</h2>
  <p style="color: #666; font-size: 14px;">
    Thank you for exploring! Feel free to connect via the links below.
  </p>

  <!-- Social Icons -->
<div style="
  display: flex;
  gap: 25px;
  align-items: center;
  flex-wrap: wrap;
  justify-content: center;
  margin-bottom: 25px;
">
  <!-- LinkedIn -->
  <a href="https://www.linkedin.com/in/ChiragB254" target="_blank" style="text-decoration: none; color: #0077b5;">
    <i class="fab fa-linkedin fa-lg"></i> LinkedIn
  </a>

  <!-- GitHub -->
  <a href="https://github.com/ChiragB254" target="_blank" style="text-decoration: none; color: #333;">
    <i class="fab fa-github fa-lg"></i> GitHub
  </a>

  <!-- Instagram -->
  <a href="https://www.instagram.com/data.scientist_chirag" target="_blank" style="text-decoration: none; color: #E1306C;">
    <i class="fab fa-instagram fa-lg"></i> Instagram
  </a>

  <!-- Email -->
  <a href="mailto:devchirag27@gmail.com" style="text-decoration: none; color: #D44638;">
    <i class="fas fa-envelope fa-lg"></i> Email
  </a>

  <!-- X (Twitter) -->
  <a href="https://x.com/ChiragB254" target="_blank" style="text-decoration: none; color: #000;">
    <i class="fab fa-x-twitter fa-lg"></i> X.com
  </a>
  </div>

  <p style="font-size: 13px; color: black; font-style: italic; margin-top: 8px;">
    <strong>Made with ‚ù§Ô∏è by Chirag Bansal</strong>
  </p>
</div>

