In [None]:
# pip install -r requirements.txt

In [1]:
import os
import pickle
import warnings
import time
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.vectorstores import InMemoryVectorStore
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.llms import Ollama

warnings.filterwarnings("ignore", category=DeprecationWarning)

# 1. Load PDFs and Split into Chunks
- [How to Load PDFs](https://python.langchain.com/docs/how_to/document_loader_pdf/)
- Each chunks contain each page of the PDFs

In [None]:
folder_path = "documents/"
pages = []
for file_name in os.listdir(folder_path):
    if file_name.endswith(".pdf"):
        file_path = os.path.join(folder_path, file_name)
        try:
            loader = PyPDFLoader(file_path)
            print(f"Extract {file_name}")
        except Exception as e:
            print(f"Failed to extract text from {file_name}: {e}")

        async for page in loader.alazy_load():
            pages.append(page)


Extract ZMQ Functional Description.pdf


In [5]:
print(pages[0].page_content)

 
 
Electricity Meters IEC 
High Precision Metering 
 
Qualigrid ZMQ200, ZFQ200, ZCQ200
E850
Functional Description
 
 
Date: 21.12.2011 
File name: D000011320 E850 ZxQ Functional Description EN.docx 
  
© Landis+Gyr D000011320 EN h



# 2. Embed PDF and store in vectorstore 

In [None]:
embedding_wrapper = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
vectorstore = InMemoryVectorStore.from_documents(pages, embedding_wrapper)
print("All chuncks are embedded and stored in vectorstore")


In [None]:
vector_file = 'vectorstore/vectorstore.pkl'

# Save vectorstore object to a file
with open(vector_file, "wb") as f:
    pickle.dump(vectorstore, f)
print("Vector store saved using pickle.")



In [None]:

# Load vectorstore object from a file
with open(vector_file, "rb") as f:
    vectorstore = pickle.load(f)
print("Vector store loaded using pickle.")

# # Build the retriever
# retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})


# 3. Build a retrieval from vectorstore 

In [None]:
# build a retriever from a vectorstore using its .as_retriever method
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
# return a list of k page (class langchain_core.documents.base.Document)


# 4. Generate response from query and LLM
cmd: `` ollama pull llama3.2 `` 

location: C:\Users\<user_name>\.ollama\models

``.invoke()`` and ``.stream()``
- ``.invoke()`` 
    - This method runs the entire ``rag_chain`` pipeline with the provided query as input and returns the final output from ``StrOutputParser`` as a single response.
    - In this case, ``retriever.invoke(query)`` retrieves relevant documents, ``format_docs`` formats them, the prompt is populated, and the model generates an answer, which is parsed and returned.
- ``.stream()``
    - This method executes the ``rag_chain`` in a streaming fashion, yielding each chunk of the generated answer as it’s produced by the model. This is useful for generating large outputs that need to be processed or displayed incrementally, enabling real-time feedback to the user.

In [None]:

llm = Ollama(
    model="llama3.2",
    temperature=0.7,   # Adjusts randomness
    top_k=40,          # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
    top_p=0.5,         # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
    verbose=False,
    cache=True
)

[Ollama Documentation](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.ollama.Ollama.html#langchain_community.llms.ollama.Ollama)

In [None]:
template = '''
You are an assistant for question-answering tasks. Use the following pieces of retrieved context and sources to answer the asked question only. If you don't know the answer, say that you don't know.
<context>
{context}
</context>
Answer the following question and include all sources at the end of your respond (in format of xxx.pdf and page):
{question}
'''
prompt = ChatPromptTemplate.from_template(template)

In [None]:
def format_docs(docs):
    text = ''
    for doc in docs:
        source = get_source_from_doc(doc)
        content = doc.page_content
        text += 'From '+source + content + '\n\n'
    return text

def format_docs_truncate(docs):
    text = format_docs(docs)
    return text[:5000]

def get_source_from_doc(doc):
    pp = str(doc.metadata['page']+1)
    source = str(doc.metadata['source'])
    start = source.find('/')
    source = source[start+1:]
    text = source + ', page ' + pp + '\n'
    return text

def get_source_from_list(docs):
    return "".join(get_source_from_doc(doc) for doc in docs)


rag_chain = (
    {"context": retriever | format_docs_truncate, "question": RunnablePassthrough()} # input: query, output: context (a formatted string of retrieved doc content) and question (the  query)
    | prompt # input: context and question, output: filled prompt
    | llm # input: filled prompt, output: string
    | StrOutputParser() # input: string, output: processed readable response
)


In [None]:
query = ['What is the role of the signal processor in the ZMQ metering system?',
'What are the three meter types discussed in the manual, and how do they differ?',
'What are the possible housing types for the ZMQ meter?',
'What is the significance of the hardware configuration ID for the ZMQ meter?',
'How is the network frequency calculated by the ZMQ metering system?',
'What are the default measured quantities available in C.4 meters?',
'Which firmware versions are used for meters with the C.2 and C.4 software configurations?',
'How is reactive energy allocated to four quadrants in the ZMQ system?',
'What is the purpose of the MAP120 tool mentioned in the manual?',
'Describe the process for calculating apparent energy in ZMQ meters.',
'How does the ZMQ system handle frequency monitoring for error detection?',
'What standards or protocols does the ZMQ meter use for communication interfaces?',
'Can the ZMQ meter support custom configurations for harmonic distortion analysis?',
'What specific features make the C.7 configuration suitable for the Indian market?',
'How does the ZMQ meter accommodate changes in energy tariffs through its configuration?',
'What are the use cases for the additional power supply in ZMQ meters?',
'What does the manual suggest regarding the accuracy limitations of voltage dips?',
"How does the manual address cybersecurity considerations in the ZMQ meter's design?",
'What applications are best suited for using the power quality recorder in the ZMQ meter?',
'Does the ZMQ meter provide direct support for integration with smart grid systems?',
'What is the latest version of the iPhone?',
'How do solar panels convert sunlight into electricity?',
'What are the main features of Windows 11?',
'Who discovered the theory of relativity?',
'How does blockchain technology work?',
'What is the capital of Japan?',
'Explain the process of DNA replication.',
'What are the key features of Tesla electric cars?',
'What are the health benefits of a Mediterranean diet?',
'How does 5G technology differ from 4G?']

def generate_response(query):
    response = ""
    start_generate_time = time.time()
    for chunk in rag_chain.stream(query):
        print(chunk, end="", flush=True)
        response += chunk
    finish_time = time.time()
    return response, finish_time-start_generate_time

for q in query:
    print(f"{q}\n")
    response, generate_time = generate_response(q)
    print("\n**********")

    retrieved_docs = retriever.invoke(q)
    sources =  get_source_from_list(retrieved_docs)

    output_text = f'''
    {q} \n
    {response}\n
    Retrieved source:\n
    {sources}
    Generate time: {generate_time:.2f}\n
    **********
    '''
    with open('response.txt', 'a') as file:
        file.write(output_text)

Resources:
1. [How-to guides](https://python.langchain.com/docs/how_to/)
2. [How to get your RAG application to return sources](https://python.langchain.com/docs/how_to/qa_sources/)
3. [How to stream runnables](https://python.langchain.com/docs/how_to/streaming/)
4. [How to add message history](https://python.langchain.com/docs/how_to/message_history/)