Creating the DB - FAISS

In [2]:
!pip install langchain
!pip install transformers
!pip install pydantic
!pip install fastapi



In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import json

# Load preprocessed data
with open("datasets/pqaa_documents.json", "r") as f:
    documents = json.load(f)

# Initialize embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create FAISS vectorstore
texts = [doc["text"] for doc in documents]
metadatas = [doc["metadata"] for doc in documents]
vectorstore = FAISS.from_texts(texts, embedding=embedding_model, metadatas=metadatas)

# Save FAISS index
vectorstore.save_local("db/faiss")


  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


RAG - Pipeline

In [None]:
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Load LLM
model_name = "EleutherAI/gpt-neo-2.7B"  # Replace with desired model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
llm_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Wrap LLM with LangChain
llm = HuggingFacePipeline(pipeline=llm_pipeline)

# Load vectorstore
from langchain.vectorstores import FAISS  # Or Chroma if used
vectorstore = FAISS.load_local("db/faiss")  # Replace with ChromaDB if needed
retriever = vectorstore.as_retriever()

# Create RAG pipeline
rag_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)

# Test the pipeline
query = "What are the symptoms of diabetes?"
response = rag_chain.run(query)
print("Answer:", response["answer"])
print("Sources:", response["source_documents"])


RAG FastAPI

In [None]:
from fastapi import FastAPI
from pydantic import BaseModel

app = FastAPI()

# Define input schema
class Query(BaseModel):
    question: str

@app.post("/query/")
async def query_rag(query: Query):
    response = rag_chain.run(query.question)
    return {"answer": response["answer"], "sources": response["source_documents"]}


In [None]:
uvicorn app:app --reload

Deploy on Hugging Face

In [None]:
git add .
git commit -m "Add RAG pipeline"
git push
