MODEL BUILDING

In [None]:
!pip install langchain langchain-community transformers==4.57.6 sentence-transformers faiss-cpu pypdf fastapi uvicorn pyngrok nest-asyncio

In [None]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFacePipeline
from langchain_classic.chains import RetrievalQA
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# Folder containing PDFs
pdf_folder = "/pdf data"  # ensure PDFs exist here
pdf_files = [f for f in os.listdir(pdf_folder) if f.endswith(".pdf")]

all_documents = []

# Load PDFs and lowercase content
for pdf_file in pdf_files:
    loader = PyPDFLoader(os.path.join(pdf_folder, pdf_file))
    docs = loader.load()
    for doc in docs:
        doc.page_content = doc.page_content.lower()
    all_documents.extend(docs)

print(f"Total documents loaded: {len(all_documents)}")

# Split documents into chunks
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100
)
chunks = splitter.split_documents(all_documents)

print(f"Total chunks created: {len(chunks)}")

# Create embeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# Build FAISS vector store
vectorstore = FAISS.from_documents(chunks, embeddings)

# Load FLAN-T5 model
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer
)

llm = HuggingFacePipeline(pipeline=pipe)

# Build RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    chain_type="stuff"
)


In [3]:
# Query
query = "what is a circle"
query = query.lower()
answer = qa_chain.run(query)

print("Answer:", answer)

  answer = qa_chain.run(query)


Answer: the boundary of the disc


In [None]:
vectorstore.save_local('/vector_store')

DEPLOYMENT (FASTAPI)

In [None]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFacePipeline
from langchain_classic.chains import RetrievalQA
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# Build FAISS vector store
vectorstore = FAISS.load_local('/vector_store', embeddings, allow_dangerous_deserialization=True)

# Load FLAN-T5 model
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer
)

llm = HuggingFacePipeline(pipeline=pipe)

# Build RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    chain_type="stuff"
)

import asyncio
from fastapi import FastAPI
import nest_asyncio
from pyngrok import ngrok
import uvicorn
from fastapi.responses import JSONResponse
from pydantic import BaseModel

app = FastAPI()

class StrInput(BaseModel):
  text : str


@app.get('/')
def home():
  return 'NOTHING HERE GO TO /predict'

@app.post("/predict")
def predict_rag_response(data : StrInput):
  return JSONResponse(content=qa_chain.run(data.text.lower()))


''' One important note I was running this code in colab so for colab I was using ngrok
if you are running this in your local machine then no need to use ngrok use uvicorn's standard method
to run it on localhost. The code below is useless if you are running it on your local machine.'''



nest_asyncio.apply()

# Set ngrok token
ngrok.set_auth_token("YOUR AUTH KEY")

# Start ngrok tunnel
public_url = ngrok.connect(8000)
print("Public URL:", public_url)

async def uvicorn_serve():
  config = uvicorn.Config(app, host="0.0.0.0", port=8000, loop="asyncio")
  server = uvicorn.Server(config)
  await server.serve()
# Schedule the server to run as a task in the existing event loop
loop = asyncio.get_event_loop()
loop.create_task(uvicorn_serve())


In [2]:
from pyngrok import ngrok
# Terminate any ngrok tunnels currently running
ngrok.kill()