In [17]:
import os
from llama_index.core import VectorStoreIndex, StorageContext, SimpleDirectoryReader, ServiceContext
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core import PromptTemplate
from llama_index.llms.openai import OpenAI
import openai

In [None]:
# openai.api_key = os.getenv("OPEN_AY_KEY")


In [8]:
# https://docs.llamaindex.ai/en/stable/examples/vector_stores/SimpleIndexDemoLlama-Local.html

# https://blog.streamlit.io/build-a-chatbot-with-custom-data-sources-powered-by-llamaindex/

# https://github.com/nicknochnack/Llama2RAG/blob/main/app.py

### Load the documents

In [None]:
documents = SimpleDirectoryReader("data", recursive=True).load_data()

### LLama2 model

In [21]:
SYSTEM_PROMPT = """You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow:
- Generate human readable output, avoid creating output with gibberish text.
- Keep your answers technical and based on facts, do not hallucinate features.
- Generate only the requested output, don't include any other language before or after the requested output.
- Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.
- Generate professional language.
- Never generate offensive or foul language.
"""

In [35]:
SYSTEM_PROMPT = """Você é um assistente de IA que responde perguntas de maneira amigável, com base nos documentos de origem fornecidos. Aqui estão algumas regras que você sempre segue:
- Gere resultados legíveis por humanos em pt-BR, evite criar resultados com texto sem sentido.
- Mantenha suas respostas técnicas e baseadas em fatos, não tenha alucinações sobre as features.
- Gere apenas a saída solicitada, não inclua nenhum outro idioma antes ou depois da saída solicitada.
- Nunca diga obrigado, que você está feliz em ajudar, que é um agente de IA, etc. Basta responder diretamente.
- Gerar linguagem profissional.
- Nunca gere linguagem ofensiva ou chula
"""

In [36]:
query_wrapper_prompt = PromptTemplate(
    "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)

In [None]:
# llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5, system_prompt=SYSTEM_PROMPT)

model_name =  "meta-llama/Llama-2-7b-hf"

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=2048,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=model_name,
    model_name=model_name,
    device_map="auto",
)

### Embedding Model 

In [None]:
# Load in a specific embedding model
embed_model = HuggingFaceEmbedding(model_name='sentence-transformers/distiluse-base-multilingual-cased-v1')

In [None]:
Settings.llm = llm
Settings.embed_model = embed_model

In [None]:
# deprecated version
# Create a service context with the custom embedding model
# service_context = ServiceContext.from_defaults(llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model)

In [None]:
# Create an index using the service context
new_index = VectorStoreIndex.from_documents(documents)

query_engine = new_index.as_query_engine()



In [None]:
response = query_engine.query("sobre o que fala este documento?")
print(response)