In [1]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

load_dotenv()

True

In [2]:
def update_document_metadata(docs) :
    page = 0
    for doc in docs:
        doc.metadata = {
            'page_number': page
        }
        page += 1

In [3]:
# read from pdf file
loader = PyPDFLoader('data/combined.pdf')
documents = loader.load()
update_document_metadata(documents)
print('Loaded documents')

Loaded documents


In [4]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
split_documents = splitter.split_documents(documents)
print('Split documents')

Split documents


In [5]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model='text-embedding-3-large')
print('Loaded embeddings')

Loaded embeddings


In [6]:
from langchain_milvus import Milvus

URI = os.getenv('ZILLIZ_URL')

vector_store_flat = Milvus(
    embedding_function=embeddings,
    connection_args={'uri': URI, 'token': os.getenv('ZILLIZ_TOKEN')},
    index_params={'index_type': 'FLAT', 'metric_type': 'COSINE'},
    consistency_level='Strong',
    drop_old=True
)
print('Using Zilliz for vector db')

2025-06-05 22:36:45,000 [DEBUG][_create_connection]: Created new connection using: 33f75eabc4504b7f8d11e2c1a0116047 (async_milvus_client.py:599)


Using Zilliz for vector db


In [7]:
from uuid import uuid4

uuids = [str(uuid4()) for _ in range(len(split_documents))]

In [8]:
vector_store_flat.add_documents(documents=split_documents, ids=uuids)

['be6120aa-4b5c-4c00-b222-22dabdb84705',
 'daed4acf-6fe3-4658-a4a7-dbd9fad4f5b5',
 '10d51175-ce7a-409a-93ad-f7db5a6bfd20',
 '412e377c-1d41-4c2b-8e9b-b901ad32166a',
 '4773da2b-8954-4523-bd68-a2232a987504',
 '6ba938c5-0ede-40e8-ab0b-f5eea5fcc917',
 'c390578c-edbb-4934-b260-b39b65a246c3',
 '2512d7d6-7d7a-400d-9a21-dccc4137dd7d',
 '62d7a1cc-a5ce-4cff-bbe1-8bd2497bb84d',
 '5f727628-1720-4734-8b17-13f8ec5bfcd4',
 'c0c88b9b-3c80-47e6-9098-8cfd5fe01e52',
 'd4fc04d9-64d4-4b34-a9d3-cde3b043c90a',
 'a44f2228-be06-45fc-8214-795303e71358',
 '21c27f90-681f-4707-b010-5bb129274489',
 '75638624-e174-42f6-9ffa-4416f9a18e39',
 '78241df7-2b2d-47d3-90c4-a45a82840bc1',
 '119746ce-90eb-4900-81d9-6b9fe737243d',
 'e3700e70-b2de-4eac-8d9a-405d5f07325d',
 '7a4f09f1-068c-4a67-ae8d-e28ec61968c3',
 'e4eb73a4-eabb-4c4a-b686-3a946c0fbb84',
 '88abbb36-63a8-46f6-b6a7-9c8ed669ed0f',
 '0168ab69-a71d-47e3-bac0-a4acaa7f9991',
 '1357ac3a-3a41-484a-8470-d8d26ae223b3',
 'ffd116e1-8e36-4078-a1e0-7d743603a85c',
 'b0819ca2-0725-

In [9]:
def render_vector_store_result(result):
    for res, score in result:
        print(f'{res.page_content}\nSimilarity Search: {score}')

In [10]:
result = vector_store_flat.similarity_search_with_score('What is LLM?', k=3)

render_vector_store_result(result)

GSM8K GPT-4 (-) 92.0 (5-shot)PaLM-2 (Large)80.7 (8-shot) U-PaLM (540B) 58.5 (-)
Problem Solving and
Logical Reasoning HumanEval Gemini(f) (Ultra) 74.4 (zero shot) GPT-4 (-) 67.0 (zero shot)Code Llama (34B)48.8 (zero shot)
support, and educational content development. For students, by
analyzing their learning styles, performance, and preferences,
LLMs can provide customized study materials and practice
questions to develop personalized learning experiences [442].
For teachers, LLMs can help to create lesson plans and grade
assignments and generate diverse and inclusive educational
content, significantly saving more time for teaching and student
interaction [443, 444]. In language learning, LLMs serve as
advanced conversational partners capable of simulating conver-
sations in multiple languages, correcting grammar, enhancing
vocabulary, and aiding pronunciation for the needs of fluency
in practice [445]. Furthermore, LLMs improve accessibility
Similarity Search: 0.48981690406799316
and 

In [11]:
vector_store_hnsw = Milvus(
    embedding_function=embeddings,
    connection_args={'uri': URI, 'token': os.getenv('ZILLIZ_TOKEN'), 'db_name': 'milvus_hnsw_demo'},
    index_params={'index_type': 'HNSW', 'metric_type': 'COSINE'},
    consistency_level='Strong',
    drop_old=True
)

2025-06-05 22:37:20,479 [DEBUG][_create_connection]: Created new connection using: 5d182d836ea44d94a5ea8f01c8dc0083 (async_milvus_client.py:599)


In [12]:
vector_store_hnsw.add_documents(documents=split_documents, ids=uuids)

['be6120aa-4b5c-4c00-b222-22dabdb84705',
 'daed4acf-6fe3-4658-a4a7-dbd9fad4f5b5',
 '10d51175-ce7a-409a-93ad-f7db5a6bfd20',
 '412e377c-1d41-4c2b-8e9b-b901ad32166a',
 '4773da2b-8954-4523-bd68-a2232a987504',
 '6ba938c5-0ede-40e8-ab0b-f5eea5fcc917',
 'c390578c-edbb-4934-b260-b39b65a246c3',
 '2512d7d6-7d7a-400d-9a21-dccc4137dd7d',
 '62d7a1cc-a5ce-4cff-bbe1-8bd2497bb84d',
 '5f727628-1720-4734-8b17-13f8ec5bfcd4',
 'c0c88b9b-3c80-47e6-9098-8cfd5fe01e52',
 'd4fc04d9-64d4-4b34-a9d3-cde3b043c90a',
 'a44f2228-be06-45fc-8214-795303e71358',
 '21c27f90-681f-4707-b010-5bb129274489',
 '75638624-e174-42f6-9ffa-4416f9a18e39',
 '78241df7-2b2d-47d3-90c4-a45a82840bc1',
 '119746ce-90eb-4900-81d9-6b9fe737243d',
 'e3700e70-b2de-4eac-8d9a-405d5f07325d',
 '7a4f09f1-068c-4a67-ae8d-e28ec61968c3',
 'e4eb73a4-eabb-4c4a-b686-3a946c0fbb84',
 '88abbb36-63a8-46f6-b6a7-9c8ed669ed0f',
 '0168ab69-a71d-47e3-bac0-a4acaa7f9991',
 '1357ac3a-3a41-484a-8470-d8d26ae223b3',
 'ffd116e1-8e36-4078-a1e0-7d743603a85c',
 'b0819ca2-0725-

In [13]:
result = vector_store_hnsw.similarity_search_with_score('What is LLM?', k=3)

render_vector_store_result(result)

GSM8K GPT-4 (-) 92.0 (5-shot)PaLM-2 (Large)80.7 (8-shot) U-PaLM (540B) 58.5 (-)
Problem Solving and
Logical Reasoning HumanEval Gemini(f) (Ultra) 74.4 (zero shot) GPT-4 (-) 67.0 (zero shot)Code Llama (34B)48.8 (zero shot)
support, and educational content development. For students, by
analyzing their learning styles, performance, and preferences,
LLMs can provide customized study materials and practice
questions to develop personalized learning experiences [442].
For teachers, LLMs can help to create lesson plans and grade
assignments and generate diverse and inclusive educational
content, significantly saving more time for teaching and student
interaction [443, 444]. In language learning, LLMs serve as
advanced conversational partners capable of simulating conver-
sations in multiple languages, correcting grammar, enhancing
vocabulary, and aiding pronunciation for the needs of fluency
in practice [445]. Furthermore, LLMs improve accessibility
Similarity Search: 0.48981690406799316
and 

In [14]:
vector_store_ivf = Milvus(
    embedding_function=embeddings,
    connection_args={'uri': URI, 'token': os.getenv('ZILLIZ_TOKEN'), 'db_name': 'milvus_ivf_demo'},
    index_params={'index_type': 'IVF_FLAT', 'metric_type': 'COSINE'},
    consistency_level='Strong',
    drop_old=True
)

2025-06-05 22:37:48,291 [DEBUG][_create_connection]: Created new connection using: 979b5a11b3dc4296a4cd59a580d82984 (async_milvus_client.py:599)


In [15]:
vector_store_ivf.add_documents(documents=split_documents, ids=uuids)

['be6120aa-4b5c-4c00-b222-22dabdb84705',
 'daed4acf-6fe3-4658-a4a7-dbd9fad4f5b5',
 '10d51175-ce7a-409a-93ad-f7db5a6bfd20',
 '412e377c-1d41-4c2b-8e9b-b901ad32166a',
 '4773da2b-8954-4523-bd68-a2232a987504',
 '6ba938c5-0ede-40e8-ab0b-f5eea5fcc917',
 'c390578c-edbb-4934-b260-b39b65a246c3',
 '2512d7d6-7d7a-400d-9a21-dccc4137dd7d',
 '62d7a1cc-a5ce-4cff-bbe1-8bd2497bb84d',
 '5f727628-1720-4734-8b17-13f8ec5bfcd4',
 'c0c88b9b-3c80-47e6-9098-8cfd5fe01e52',
 'd4fc04d9-64d4-4b34-a9d3-cde3b043c90a',
 'a44f2228-be06-45fc-8214-795303e71358',
 '21c27f90-681f-4707-b010-5bb129274489',
 '75638624-e174-42f6-9ffa-4416f9a18e39',
 '78241df7-2b2d-47d3-90c4-a45a82840bc1',
 '119746ce-90eb-4900-81d9-6b9fe737243d',
 'e3700e70-b2de-4eac-8d9a-405d5f07325d',
 '7a4f09f1-068c-4a67-ae8d-e28ec61968c3',
 'e4eb73a4-eabb-4c4a-b686-3a946c0fbb84',
 '88abbb36-63a8-46f6-b6a7-9c8ed669ed0f',
 '0168ab69-a71d-47e3-bac0-a4acaa7f9991',
 '1357ac3a-3a41-484a-8470-d8d26ae223b3',
 'ffd116e1-8e36-4078-a1e0-7d743603a85c',
 'b0819ca2-0725-

In [16]:
result = vector_store_ivf.similarity_search_with_score('What is LLM?', k=3)

render_vector_store_result(result)

GSM8K GPT-4 (-) 92.0 (5-shot)PaLM-2 (Large)80.7 (8-shot) U-PaLM (540B) 58.5 (-)
Problem Solving and
Logical Reasoning HumanEval Gemini(f) (Ultra) 74.4 (zero shot) GPT-4 (-) 67.0 (zero shot)Code Llama (34B)48.8 (zero shot)
support, and educational content development. For students, by
analyzing their learning styles, performance, and preferences,
LLMs can provide customized study materials and practice
questions to develop personalized learning experiences [442].
For teachers, LLMs can help to create lesson plans and grade
assignments and generate diverse and inclusive educational
content, significantly saving more time for teaching and student
interaction [443, 444]. In language learning, LLMs serve as
advanced conversational partners capable of simulating conver-
sations in multiple languages, correcting grammar, enhancing
vocabulary, and aiding pronunciation for the needs of fluency
in practice [445]. Furthermore, LLMs improve accessibility
Similarity Search: 0.4896777272224426
and p

In [17]:
from langchain_milvus import BM25BuiltInFunction, Milvus
from langchain_openai import OpenAIEmbeddings

vectorstore = Milvus.from_documents(
    documents=split_documents,
    embedding=embeddings,
    builtin_function=BM25BuiltInFunction(),
    vector_field=['dense', 'sparse'],
    connection_args={'uri': URI, 'token': os.getenv('ZILLIZ_TOKEN')},
    consistency_level='Strong',
    drop_old=True,
)

2025-06-05 22:38:18,646 [DEBUG][_create_connection]: Created new connection using: 71f9031d6fb440fda3f5deff0aa92581 (async_milvus_client.py:599)


In [18]:
result = vectorstore.similarity_search_with_score(
    'Explain attention in transformers', k=3, ranker_type='weighted', ranker_params={'weights': [0.6, 0.4]}
)

render_vector_store_result(result)

Attention in transformers [64] calculates query, key, and value
mappings for input sequences, where the attention score is
obtained by multiplying the query and key, and later used to
weight values. We discuss different attention strategies used in
LLMs below.
Self-Attention [64]: Calculates attention using queries, keys,
and values from the same block (encoder or decoder).
Cross Attention: It is used in encoder-decoder architectures,
where encoder outputs are the queries, and key-value pairs
come from the decoder.
Sparse Attention [67]: Self-attention has O(n2) time complex-
ity which becomes infeasible for large sequences. To speed
up the computation, sparse attention [67] iteratively calculates
attention in sliding windows for speed gains.
Flash Attention [68]: Memory access is the major bottleneck
in calculating attention using GPUs. To speed up, flash
attention employs input tiling to minimize the memory reads
and writes between the GPU high bandwidth memory (HBM)
Similarity Searc

In [19]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate(
    template='You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don\'t know the answer, just say that you don\'t know, don\'t make up any answer. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:',
    input_variables=['context', 'question']
)

In [20]:
def format_docs(docs):
    return '\n\n'.join(doc.page_content for doc in docs)

In [21]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_groq import ChatGroq


model = ChatGroq(model='gemma2-9b-it')

In [22]:
rag_chain = (
    {'context': vectorstore.as_retriever() | format_docs, 'question': RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)
print('Created RAG chain')

Created RAG chain


In [23]:
query = 'What is an LLM and how does LangChain help with that?'

result = rag_chain.invoke(query)

print(result)

LLM stands for Large Language Model, a type of artificial intelligence trained on massive text datasets. LangChain helps with LLMs by providing tools and frameworks for building applications that utilize their capabilities. 
These tools include memory management, prompting, and integration with other data sources. 





In [24]:
from docx import Document

doc = Document()

doc.add_heading('LangChain RAG', level=1)

doc.add_paragraph(query)

doc.add_paragraph(result)

doc.save('output.docx')