In [13]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import os
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from dotenv import load_dotenv

load_dotenv()
model_name = "medgemma-4b-it"
emb_model_name = "nomic-embed-text-v1.5"
llm = ChatOpenAI(
    base_url="http://localhost:1234/v1",
    model=model_name,
    api_key="lm-studio"
)

# embeddings = OpenAIEmbeddings(
#     base_url="http://localhost:1234/v1",
#     model=emb_model_name,
#     api_key="lm-studio"
# )


In [None]:
from typing import List
import requests
from langchain.embeddings.base import Embeddings
class NomicEmbeddings(Embeddings):
    def __init__(self, model_name:str, base_url:str="http://localhost:1234/v1", api_key:str="lm-studio"):
        self.model_name = model_name
        self.base_url = base_url
        self.api_key = api_key
    
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        return [self.embed_query(text) for text in texts]
    
    def embed_query(self, text: str) -> List[float]:
        url = f"{self.base_url}/embeddings"
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-type": "application/json"
        }
        payload = {
            "model": self.model_name,
            "input": text
        }

        response = requests.post(url, headers=headers, json=payload)
        response.raise_for_status()
        return response.json()['data'][0]['embedding']


# Without RAG

In [None]:
question = "What is Langchain used for?"
print(llm.invoke(question))

# Add External Knowledge with RAG

In [3]:
docs = [
    "LangChain is a framework for developing applications powered by large language models (LLMs).",
    "LangChain is an open-source framework designed to simplify the development of applications powered by large language models (LLMs).",
    "LangChain provides a structured way to connect LLMs with external data sources, enabling more powerful and context-aware applications like chatbots and virtual assistants",
    "LangChain acts as a bridge, allowing developers to combine the capabilities of LLMs with other tools and data to create more complex and intelligent systems.",
    "LangChain implements a standard interface for large language models and related technologies, such as embedding models and vector stores, and integrates with hundreds of providers.",
]

docs

['LangChain is a framework for developing applications powered by large language models (LLMs).',
 'LangChain is an open-source framework designed to simplify the development of applications powered by large language models (LLMs).',
 'LangChain provides a structured way to connect LLMs with external data sources, enabling more powerful and context-aware applications like chatbots and virtual assistants',
 'LangChain acts as a bridge, allowing developers to combine the capabilities of LLMs with other tools and data to create more complex and intelligent systems.',
 'LangChain implements a standard interface for large language models and related technologies, such as embedding models and vector stores, and integrates with hundreds of providers.']

In [4]:
embeddings = NomicEmbeddings(model_name="nomic-embed-text-v1.5")
vectorstore = FAISS.from_texts(docs, embeddings)
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x28923120830>

In [5]:
retriever = vectorstore.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'NomicEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000028923120830>, search_kwargs={})

In [6]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff"
)
qa_chain

RetrievalQA(verbose=False, combine_documents_chain=StuffDocumentsChain(verbose=False, llm_chain=LLMChain(verbose=False, prompt=ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="Use the following pieces of context to answer the user's question. \nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n{context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='{question}'), additional_kwargs={})]), llm=ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x00000288DABC1760>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x00000288DAC8B980>, root_client=<openai.OpenAI object at

In [12]:
response = qa_chain.run(question)
print(response)

LangChain is used for developing applications powered by large language models (LLMs). It provides a structured way to connect LLMs with external data sources, enabling more powerful and context-aware applications like chatbots and virtual assistants. LangChain acts as a bridge, allowing developers to combine the capabilities of LLMs with other tools and data to create more complex and intelligent systems.



# Advanced RAG

In [14]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2}),
    return_source_documents=True
)


response = qa.invoke("How does Langchain support document QnA")
print(response['result'])

LangChain supports document Q&A by providing a structured way to connect LLMs with external data sources, enabling more powerful and context-aware applications like chatbots and virtual assistants.



In [15]:
for doc in response['source_documents']:
    print("-", doc.page_content)

- LangChain provides a structured way to connect LLMs with external data sources, enabling more powerful and context-aware applications like chatbots and virtual assistants
- LangChain is an open-source framework designed to simplify the development of applications powered by large language models (LLMs).


# Common RAG Challenges

1. Poor Chunking Strategy: Split documents intelligently - use headers, sections or semantic cues.
2. Low quality embeddings: Clean input leads to better vector quality.
3. Retrieval depth (k too low): Try k=3 or k=5 for better context coverage.