<a href="https://colab.research.google.com/github/SankaviKanesalingam/RAG-Retrieval-Augmented-Generation/blob/main/RAG_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Install Required Packages

In [55]:
!pip install langchain openai huggingface_hub objectbox faiss-cpu
!pip install -U langchain-community
!pip install -U langchain langchain-community sentence-transformers faiss-cpu




Setup API Keys

In [56]:
import os
from huggingface_hub import login

# Set your Hugging Face API key
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "your_huggingface_api_key"
login(token="your_huggingface_api_key")

# Set your OpenAI API key for embeddings
os.environ["OPENAI_API_KEY"] = "your_huggingface_api_key"


Load and Preprocess Data

In [57]:
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import CharacterTextSplitter

# Load content from a webpage (replace with actual guide URL if needed)
loader = WebBaseLoader("https://docs.smith.langchain.com/user_guide")
documents = loader.load()

# Split documents into smaller chunks
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)


Create Embeddings & Store in Vector DB


In [58]:

# Imports
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Load a local Hugging Face embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Assuming you already have `chunks` (from a document loader and text splitter)
# Example: chunks = text_splitter.split_documents(documents)

# Create the FAISS vector store
vector_store = FAISS.from_documents(chunks, embedding_model)

# (Optional) Save vector store to disk
# vector_store.save_local("my_faiss_index")


Setup RAG Prompt and LLM with Hugging Face

In [59]:
from langchain import HuggingFaceHub
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

llm = HuggingFaceHub(
    repo_id="google/flan-t5-large",
    model_kwargs={"temperature": 0.5, "max_new_tokens": 300}
)

# RAG-style prompt template
rag_prompt = PromptTemplate(
    input_variables=["question", "retrieved_docs"],
    template="""
You are an expert assistant. Use the following context to answer the user's question.
Context:
{retrieved_docs}

Question: {question}
Answer:"""
)


Define Q&A Function

In [60]:
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA

llm = HuggingFaceHub(
    repo_id="google/flan-t5-base",
    model_kwargs={"temperature": 0.5, "max_new_tokens": 300}
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

def answer_question(query):
    result = qa_chain(query)
    return result["result"]


Test the System


In [61]:
from huggingface_hub import InferenceClient

# Create a client using a model that supports text generation
client = InferenceClient(model="google/flan-t5-base")

def answer_question(prompt):
    response = client.text_generation(prompt, max_new_tokens=100)
    return response

user_input = "What is LangSmith and how can I use it with LangChain?"
response = answer_question(user_input)
print("Response:\n", response)


Response:
 LangSmith is a syncing tool for a syncing device.
