In [None]:
#IMPORT LIBRARIES

import os
import faiss
from typing import List
import PyPDF2
from sentence_transformers import SentenceTransformer
from groq import Groq
import requests

In [None]:
# Set your Groq API key
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

In [None]:
#CREATE CHATBOT CLASS

class RAGChatBot:
    def __init__(self, pdf_path, api_key):
        print("Loading and indexing document")
        self.text = self.load_pdf(pdf_path)
        self.chunks = self.chunk_text(self.text)
        self.index, self.embedder = self.build_faiss_index(self.chunks)
        self.history = []
        self.system_prompt = "Based on this context, answer the last user question:\n"
        self.client = Groq(api_key = api_key)

    def ask(self, question: str):
        info = self.retrieve_info(question)
        context = "\n\n".join(info)

        prompt = self.system_prompt + context + "\nQuestion: " + question
        conversation = self.history + [{"role" : "user", "content" : prompt}]

        answer = self.generate_with_groq(conversation)

        self.history.append({"role": "user", "content": question})
        self.history.append({"role": "assistant", "content": answer})
        return answer

    def build_faiss_index(self, chunks, model_name='sentence-transformers/all-MiniLM-L6-v2'):
        embedder = SentenceTransformer(model_name)
        embeddings = embedder.encode(chunks)
        index = faiss.IndexFlatL2(embeddings.shape[1])
        index.add(embeddings)
        return index, embedder

    def retrieve_info(self, query, k=5):
        query_vec = self.embedder.encode([query])
        distances, indices = self.index.search(query_vec, k)
        return [self.chunks[i] for i in indices[0]]
    

    def generate_with_groq(self, messages, model="llama3-8b-8192", max_tokens=512):
        response = self.client.chat.completions.create(messages = messages, model = model, max_completion_tokens = max_tokens)
        return response.choices[0].message.content.strip()
        
    @classmethod
    def load_pdf(cls, file_path):
        reader = PyPDF2.PdfReader(file_path)
        return "\n".join([page.extract_text() or "" for page in reader.pages])
    
    @classmethod
    def chunk_text(cls, text, chunk_size = 500, overlap = 100):
        words = text.split()
        chunks = []
        for i in range(0, len(words), chunk_size - overlap):
            chunks.append(" ".join(words[i:i + chunk_size]))
        return chunks


In [None]:
#CONVERSATION LOOP

pdf_path = "sample.pdf"
bot = RAGChatBot(pdf_path, GROQ_API_KEY)

while True:
    question = input("You: ")
    if question.lower() in ("exit", "quit"):
        break
    answer = bot.ask(question)
    print(f"Bot: {answer}\n")