In [24]:
# Import necessary libraries
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import OnlinePDFLoader

In [25]:
import nltk
# Download necessary NLTK data
nltk.download('punkt_tab')  # Tokenizer for splitting sentences
nltk.download('averaged_perceptron_tagger_eng')  # POS tagger for text analysis

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Yaman\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\Yaman\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


True

In [26]:
# Path to the  PDF file
local_path = "knowledge_doc.pdf"

# Load PDF document locally
if local_path:
  loader = UnstructuredPDFLoader(file_path=local_path)
  data = loader.load()
else:
  print("Upload a PDF file")

In [27]:
# Access the content of the first page of the loaded PDF
data[0].page_content



In [28]:
# Import necessary Langchain embeddings and document splitting modules
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

In [29]:
# Split the document into chunks for embedding generation
text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
chunks = text_splitter.split_documents(data)

In [30]:
# Generate embeddings for the document chunks and store them in a vector database (Chroma)
persist_directory = "models/"
vector_db = Chroma.from_documents(
    documents=chunks, 
    embedding=OllamaEmbeddings(model="nomic-embed-text",show_progress=True),
    collection_name="local-rag",
    persist_directory=persist_directory
)

OllamaEmbeddings: 100%|██████████| 61/61 [03:04<00:00,  3.02s/it]


In [31]:
# Set up the language model and retrieval system
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

In [32]:
local_model = "mistral" # loading the model to be used
llm = ChatOllama(model=local_model)

In [33]:
# Create a prompt template for generating multiple query variations
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

In [34]:
# Use multi-query retriever to generate multiple query variations and retrieve relevant chunks
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

# Define a prompt template for the final RAG-based answer generation
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

# Create the final prompt  
prompt = ChatPromptTemplate.from_template(template)

In [35]:
# Create chain for the RAG system
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [36]:
# Invoke the system with a user query
chain.invoke(input(""))

OllamaEmbeddings: 100%|██████████| 1/1 [00:05<00:00,  5.01s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.11s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.10s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.22s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.21s/it]


" The warranty period for the product starts on the date it is sold to an end user for the first time. The duration of the warranty varies based on the country or region. For most countries, the warranty is 12 months, but for specific countries like Australia, Moldova, Montenegro, New Zealand, North Macedonia, Saudi Arabia, Serbia, Turkey (Covered Countries), and European Union member states, Iceland, Liechtenstein, Norway, Switzerland, and United Kingdom (Region), the warranty is 24 months. The warranty for accessories like user-replaceable batteries, covers, cables, chargers, headsets, and any other accessory included in the sales package is 6 months across all the mentioned countries and regions. During the warranty period, if a defect arises due to materials or workmanship, it will be remedied free of charge by either repairing or replacing the defective product or accessory. However, this warranty does not cover several situations such as damage caused by exposure to liquid or dam

In [37]:
# Example query for the system
chain.invoke("How do I insert a Sim , Give me the name of the phone for which you are providing information?")

OllamaEmbeddings: 100%|██████████| 1/1 [00:05<00:00,  5.09s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.09s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.09s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.20s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.29s/it]


" The instructions provided seem to be for a Nokia 6310 phone. Here's how to insert a SIM card into it:\n\n1. Put your fingernail in the small slot at the top of the phone, lift and remove the cover.\n2. If the battery is in the phone, to remove it, lift it out.\n3. Slide the SIM card in the SIM card slot with the contact area face down. If you have a second SIM, slide it in the SIM2 slot. Both SIM cards are available at the same time when the device is not being used, but while one SIM card is active, for example, making a call, the other may be unavailable.\n4. If you have a memory card, slide the memory card holder to the left and open it up. Place the memory card in the slot."