In [None]:
# !pip uninstall -r requirements-v2.txt -y


In [2]:
from llama_index.core import SimpleDirectoryReader, ServiceContext, VectorStoreIndex, download_loader
from llama_index.core import PromptTemplate
from llama_index.llms.openai import OpenAI
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler
from llama_index.core import ServiceContext, StorageContext, load_index_from_storage

import nest_asyncio
import logging
import sys
import json
from IPython.display import Markdown, display

from langchain.llms.sagemaker_endpoint import LLMContentHandler
from langchain.llms.sagemaker_endpoint import SagemakerEndpoint

logging.basicConfig(stream=sys.stdout, level=logging.INFO)  # Change INFO to DEBUG if you want more extensive logging
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

nest_asyncio.apply()

llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])

loader = SimpleDirectoryReader('./data', recursive=True, exclude_hidden=True)

fin_docs = loader.load_data()

class ContentHandlerForTextGeneration(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
        input_str = json.dumps({"inputs": [[{"role": "user", "content": prompt},]],
                                  "parameters" : model_kwargs
                                  })
        return input_str.encode('utf-8')

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        return response_json[0]['generation']['content']

parameters = {
    "max_new_tokens": 1024,
    "temperature": 0.1,}
region="eu-west-1"

endpoint_name = "meta-textgeneration-llama-2-13b-f-2024-04-09-07-28-20-178"
content_handler = ContentHandlerForTextGeneration()
llm = SagemakerEndpoint(
    endpoint_name=endpoint_name,
    region_name=region,
    model_kwargs=parameters,
    endpoint_kwargs={"CustomAttributes":"accept_eula=true"},
    content_handler=content_handler)
    

# ------------------------------------------------------------------------------------------
storage_directory = "indexV2"
# chunk_size - It defines the size of the chunks (or nodes) that documents are broken into when they are indexed by LlamaIndex
service_context = ServiceContext.from_defaults(llm=llm, chunk_size=600,
                                               embed_model="local",
                                               callback_manager=callback_manager)

# Build the index
index = VectorStoreIndex.from_documents(fin_docs, service_context=service_context, show_progress=True)

# Persist the index to disk
index.storage_context.persist(persist_dir=storage_directory)

storage_context = StorageContext.from_defaults(persist_dir=storage_directory)
index = load_index_from_storage(storage_context, service_context=service_context)

query_engine = index.as_query_engine(service_context=service_context,
                                     similarity_top_k=3)
response = query_engine.query("Give me a summary of the document")
display(Markdown(f"<b>{response}</b>"))

ImportError: `llama-index-readers-file` package not found