In [8]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [9]:
model = SentenceTransformer('all-MiniLM-L6-v2')

Loading weights: 100%|██████████| 103/103 [00:00<00:00, 948.11it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


In [10]:
## Sample text
text="""
LangChain is a framework for building applications with LLMs.
LangChain provides modular abstractions to combine LLMs with tools like OpenAI and Pinecone.
You can create chains, agents, memory, and retrievers.
The Eiffel Tower is located in Paris.
France is a popular tourist destination.
"""

In [11]:
sentence = [s.strip() for s in text.split("\n") if s.strip()]

In [12]:
sentence

['LangChain is a framework for building applications with LLMs.',
 'LangChain provides modular abstractions to combine LLMs with tools like OpenAI and Pinecone.',
 'You can create chains, agents, memory, and retrievers.',
 'The Eiffel Tower is located in Paris.',
 'France is a popular tourist destination.']

In [14]:
embeddings = model.encode(sentence)
threshold = 0.7
chunk = []
current_chunk = [sentence[0]]

for i in range(1,len(sentence)):
    sim = cosine_similarity(
        [embeddings[i-1]],
        [embeddings[i]]
    )[0][0]
    if sim>=threshold:
        current_chunk.append(sentence[i])
    else:
        chunk.append(" ".join(current_chunk))
        current_chunk = [sentence[i]]
    
chunk.append(" ".join(current_chunk))

for idx , ck in enumerate(chunk):
    print(f"\nChunk{idx+1}:\n{ck}")


Chunk1:
LangChain is a framework for building applications with LLMs. LangChain provides modular abstractions to combine LLMs with tools like OpenAI and Pinecone.

Chunk2:
You can create chains, agents, memory, and retrievers.

Chunk3:
The Eiffel Tower is located in Paris.

Chunk4:
France is a popular tourist destination.


In [15]:
#RAG PIPELINE

In [17]:
import os
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model
from langchain_core.documents import Document
from langchain_community.document_loaders import TextLoader , PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate , PromptTemplate
from langchain_classic.chains import create_history_aware_retriever , create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain.messages import HumanMessage , AIMessage 
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough , RunnableMap , RunnableLambda


load_dotenv()


os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [18]:
#Custom Semantic Chunker With Threshold
class ThresholdSemanticChunker:
    def __init__(self, model_name = "all-MiniLM-L6-v2",threshold=0.7):
        self.model = SentenceTransformer(model_name)
        self.threshold = threshold

    def split(self , text:str):
        sentence = [s.strip() for s in text.split("\n") if s.strip()]
        embeddings = self.model.encode(sentence)
        chunk = []
        current_chunk = [sentence[0]]

        for i in range(1,len(sentence)):
            sim = cosine_similarity(
                [embeddings[i-1]],
                [embeddings[i]]
            )[0][0]
            if sim>=self.threshold:
                current_chunk.append(sentence[i])
            else:
                chunk.append(" ".join(current_chunk))
                current_chunk = [sentence[i]]
            
        chunk.append(" ".join(current_chunk))
        return chunk
    
    def split_documents(self,docs):
        result = []
        for doc in docs:
            for chunk in self.split(doc.page_content):
                result.append(Document(page_content=chunk,metadata=doc.metadata))
        return result

In [19]:
doc = Document(page_content=text)
doc

Document(metadata={}, page_content='\nLangChain is a framework for building applications with LLMs.\nLangChain provides modular abstractions to combine LLMs with tools like OpenAI and Pinecone.\nYou can create chains, agents, memory, and retrievers.\nThe Eiffel Tower is located in Paris.\nFrance is a popular tourist destination.\n')

In [20]:
###CHUNKING
chunker = ThresholdSemanticChunker(threshold=0.7)
chunks = chunker.split_documents([doc])
chunks

Loading weights: 100%|██████████| 103/103 [00:00<00:00, 786.29it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


[Document(metadata={}, page_content='LangChain is a framework for building applications with LLMs. LangChain provides modular abstractions to combine LLMs with tools like OpenAI and Pinecone.'),
 Document(metadata={}, page_content='You can create chains, agents, memory, and retrievers.'),
 Document(metadata={}, page_content='The Eiffel Tower is located in Paris.'),
 Document(metadata={}, page_content='France is a popular tourist destination.')]

In [25]:
vector_store = FAISS.from_documents(chunks , OpenAIEmbeddings())
retriever = vector_store.as_retriever()


In [26]:
template = """Answer the question based on the following context:

{context}


Question: {question}

"""

prompt = PromptTemplate.from_template(template)
prompt

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based on the following context:\n\n{context}\n\n\nQuestion: {question}\n\n')

In [27]:
llm = init_chat_model(model="groq:qwen/qwen3-32b")

In [29]:
rag_chain = (
    RunnableMap(
        {
            "context": lambda x: retriever.invoke(x["question"]),
            "question": lambda x: x['question']
        }
    )|prompt|llm|StrOutputParser()
)

In [30]:
query = {"question": "What is langchain used for?"}
result = rag_chain.invoke(query)
print(result)

<think>
Okay, so the user is asking, "What is LangChain used for?" Let me look at the provided documents to find the answer.

First, the document with ID '4a1b69bd-c7b1-4403-8f92-77813e83ae54' says, "LangChain is a framework for building applications with LLMs. LangChain provides modular abstractions to combine LLMs with tools like OpenAI and Pinecone." That seems directly relevant. It mentions that LangChain is used for building applications using LLMs (Large Language Models) and that it helps combine them with other tools.

Another document, 'dd84c688-2426-4290-b76e-59b72a968ce2', states, "You can create chains, agents, memory, and retrievers." This adds more specifics about the components or features that LangChain offers. Chains, agents, memory, and retrievers are probably parts of the framework that help in structuring applications with LLMs.

The other two documents talk about France and the Eiffel Tower, which don't relate to LangChain. So I can ignore those.

Putting it all tog