In [19]:
from llama_index.core import (
    ServiceContext,
    PromptHelper,
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage
)

from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.node_parser import SimpleNodeParser
import tiktoken
# from ..prompts import RecommendationPrompt
from typing import List
import logging

In [20]:
from pydantic import BaseModel

class RecommendationPrompt(BaseModel):

    template: str = """
        {details}
    """

    details: str

    def __str__(self):

        return self.template.format(details = self.details)

In [21]:
class RAGProvider:

    def __init__(
        self,
        api_key,
        model: str = 'gpt-3.5-turbo',
        persist_dir: str = "tmp"
    ):

        self.model = model
        self.persist_dir = persist_dir

        self.llm = OpenAI(
            model = model,
            temperature = 0,
            max_tokens = 256,
            api_key = api_key
        )

        self.embed_model = OpenAIEmbedding(api_key = api_key)

    def create_index(
        self,
        separator: str = " ",
        chunk_size: int = 1024,
        chunk_overlap: int = 20,
        prompt_context_window: int = 4096,
        num_output: int = 256,
        chunk_overlap_ratio: float = 0.1,
        chunk_size_limit = None,
        input_files: str = "butil/tmp/"
    ):

        node_parser = SimpleNodeParser.from_defaults(
            separator = separator,
            chunk_size = chunk_size,
            chunk_overlap = chunk_overlap,
            tokenizer = tiktoken.encoding_for_model(self.model).encode
        )

        prompt_helper = PromptHelper(
            context_window = prompt_context_window, 
            num_output = num_output, 
            chunk_overlap_ratio = chunk_overlap_ratio, 
            chunk_size_limit = chunk_size_limit
        )

        service_context = ServiceContext.from_defaults(
            llm = self.llm,
            embed_model = self.embed_model,
            node_parser = node_parser,
            prompt_helper = prompt_helper
        )

        documents = SimpleDirectoryReader(input_files).load_data()

        index = VectorStoreIndex.from_documents(
            documents, 
            service_context = service_context
        )

        index.storage_context.persist(persist_dir = self.persist_dir)
    
    def query(
        self,
        prompt: RecommendationPrompt,
        index_dir: str = "tmp/"
    ) -> str:
        
        storage_context = StorageContext.from_defaults(persist_dir = index_dir)
        index = load_index_from_storage(storage_context = storage_context)
        query_engine = index.as_query_engine()

        formatted_prompt = str(prompt)
        logging.info(f"Formatted prompt: {formatted_prompt}")
        response = query_engine.query(formatted_prompt)
        return response

In [22]:
import dotenv

In [23]:
dotenv.load_dotenv()

True

In [24]:
import os

In [25]:
# read OPENAI_API_KEY from .env file
api_key = os.getenv("OPENAI_API_KEY")

In [8]:
provider = RAGProvider(api_key = api_key)

In [9]:
prompt = provider.create_index()

  service_context = ServiceContext.from_defaults(


In [14]:
prompt_question = RecommendationPrompt(details = "which movie had song Dus Bahane 2.0")

In [15]:
prompt

In [16]:
recommendations = provider.query(prompt = prompt_question)

In [27]:
recommendations

Response(response='The movie that had the song "Dus Bahane 2.0" is Baaghi 3.', source_nodes=[NodeWithScore(node=TextNode(id_='7e85ac42-c783-4b54-9daa-8c0059227a76', embedding=None, metadata={'page_label': '7', 'file_name': 'Baaghi_3.pdf', 'file_path': '/Users/abhi/Git/M-PROJECT/movieClaude-dm/butil/tmp/Baaghi_3.pdf', 'file_type': 'application/pdf', 'file_size': 272107, 'creation_date': '2024-04-21', 'last_modified_date': '2024-04-21'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='b4cd9c4b-2bc4-4204-b68c-9e0e2437aa39', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '7', 'file_name': 'Baaghi_3.pdf', 'file_path': '/Users/abhi/Git/M-PROJECT/movieClaude-dm/butil/tmp/Baaghi_3.pdf', 'file_type': 

In [26]:
recommendations.response

'The movie that had the song "Dus Bahane 2.0" is Baaghi 3.'