# RAG pipeline

In [None]:
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from dotenv import load_dotenv
import os
import requests

## 1 - Load document

In [None]:
loader = PyMuPDFLoader("paper.pdf")
documents = loader.load()

## 2 - Split it into Chunks

In [None]:
# Splitting the documents into chunks
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100
)

chunks = splitter.split_documents(documents)

## 3 - Turn into embeddings and store it into ChromaDB

In [None]:
# The model prooved to be more efficient than other lightweight models when I tried it (lower distances)
embedder = HuggingFaceEmbeddings(
    model_name="BAAI/bge-base-en-v1.5"
)

In [None]:
rag_collection = "rag_collection"
db = Chroma.from_documents(
    chunks,
    embedding=embedder,
    persist_directory="./chroma_db",
    collection_name=rag_collection
)

## 4 - Query the DB to find the most relevant chuncks

In [None]:
query = "Represent this sentence for retrieving relevant passages: Who authored this paper?" # define relevant query right here

In [None]:
retriever = db.as_retriever(search_kwargs={"k": 5})
relevant_chunks = retriever.invoke(query) # looking for the top 5 answers (ie with minimal distance to the query)

## 5 - Combine relevant chunks into context

In [None]:
context = "\n".join([doc.page_content for doc in relevant_chunks])

In [None]:
context

## 6 - connect to the API 

In [None]:
load_dotenv()
API_KEY = os.getenv("API_KEY") # loading the API key from the .env file

In [None]:
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.1"  # model URL

headers = {
    "Authorization": f"Bearer {API_KEY}"
}

def call_llm_api(query, context):
    prompt = f"Context: {context}\n\nQuestion: {query}\nAnswer:"
    response = requests.post(API_URL, headers=headers, json={"inputs": prompt})
    return response.json()[0]["generated_text"]

In [None]:
response = call_llm_api(query=query, context=context)
print(response)