<a href="https://colab.research.google.com/github/Aftabbs/Q-A-RAG-Application/blob/main/Basic_Q%26A_RAG_Application.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import fitz
import faiss
from sentence_transformers import SentenceTransformer
from openai import OpenAI

client = OpenAI(
  base_url="https://integrate.api.nvidia.com/v1",
  api_key="NVIDIA-API-KEY"
)

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

def chunk_text(text, chunk_size=500):
    chunks = []
    for i in range(0, len(text), chunk_size):
        chunks.append(text[i:i+chunk_size])
    return chunks

def index_chunks(chunks):
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
    embeddings = model.encode(chunks)
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings)
    return index, embeddings

def retrieve_chunks(question, chunks, index, embeddings, top_k=3):
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
    question_embedding = model.encode([question])
    distances, indices = index.search(question_embedding, top_k)
    relevant_chunks = [chunks[i] for i in indices[0]]
    return relevant_chunks

def generate_answer(question, chunks):
    context = "\n".join(chunks)
    messages = [
        {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\nAnswer:"}
    ]
    completion = client.chat.completions.create(
        model="meta/llama3-8b-instruct",
        messages=messages,
        temperature=0.5,
        top_p=1,
        max_tokens=1024,
        stream=True
    )
    answer = ""
    for chunk in completion:
        if chunk.choices[0].delta.content is not None:
            answer += chunk.choices[0].delta.content
    return answer

def main(pdf_path, question):
    text = extract_text_from_pdf(pdf_path)
    chunks = chunk_text(text)
    index, embeddings = index_chunks(chunks)
    relevant_chunks = retrieve_chunks(question, chunks, index, embeddings)
    answer = generate_answer(question, relevant_chunks)
    return answer

pdf_path = "/content/.pdf"
question = "Ask Question?"
answer = main(pdf_path, question)
print(answer)
