<a href="https://colab.research.google.com/github/Shiva4113/RAG-Agentic/blob/main/BasicRAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### REQUIREMENTS

In [None]:
!pip install llama-index
!pip install llama-index-embeddings-jinaai
!pip install llama-index-llms-groq

### IMPORTS

In [None]:
import os
from google.colab import userdata
from llama_index.core import VectorStoreIndex, Settings, SimpleDirectoryReader
from llama_index.embeddings.jinaai import JinaEmbedding
from llama_index.llms.groq import Groq

### Basic RAG Pipeline

In [None]:
class NaiveRAG:
    def __init__(self, pdf_path: str):
      #configuring your API keys
        self.groq_key = userdata.get('GROQ_API_KEY') #for LLM querying
        self.jina_key = userdata.get('JINA_API_KEY') #for generating embeddings

        if not self.groq_key or not self.jina_key:
            raise ValueError("API keys not found in .env file")

        self._setup_settings()
        self.index = self._create_index(pdf_path)

    def _setup_settings(self):
        """Configure global settings for LlamaIndex"""
        Settings.llm = Groq(
            api_key=self.groq_key,
            model="llama-3.3-70b-versatile"# the model that will be queried
        )

        Settings.embed_model = JinaEmbedding(
            api_key=self.jina_key,
            model_name="jina-embeddings-v2-base-en" # the embedding model used for vectorization
        )

    def _create_index(self, pdf_path): #This Vector Store index is used instead of using a Vector Database, for larger scale applications, you can also use a Vector Database like Pinecone
        """Create vector index from PDF document"""
        if not os.path.exists(pdf_path):
            raise FileNotFoundError(f"PDF file not found: {pdf_path}")

        reader = SimpleDirectoryReader(input_files=[pdf_path])#To read files in a directory
        documents = reader.load_data()
        return VectorStoreIndex.from_documents(documents)

    def query(self, question: str):
        """Query the RAG system"""
        query_engine = self.index.as_query_engine()#setting up the query engine -> Llama-3.3-70b is the LLM being queried
        return query_engine.query(question)

### Example Implementation

In [None]:
pdf_path = "/content/dataset_test.pdf" # path to the document that you uploaded

rag = NaiveRAG(pdf_path)# initialize the Naive RAG instance which will process PDF and create a vector index

In [None]:
question = "What is a public cloud?"# Any question that is within the scope of the uploaded doc
response = rag.query(question)
print(f"Question: {question}")
print(f"Answer: {response}")