In [61]:
import nest_asyncio

nest_asyncio.apply()

In [62]:
from dotenv import load_dotenv
import os
import openai

load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')

In [63]:
from llama_index import SimpleDirectoryReader

directory_path = "data"
documents = SimpleDirectoryReader(directory_path, filename_as_id=True).load_data()

In [64]:
from llama_index import VectorStoreIndex, ServiceContext, StorageContext, load_index_from_storage

try:
    storage_context = StorageContext.from_defaults(persist_dir="./storage")
    # Try to load the index from storage
    index = load_index_from_storage(storage_context)
except FileNotFoundError:
    service_context = ServiceContext.from_defaults(chunk_size=1024)
    index = VectorStoreIndex.from_documents(
        documents, service_context=service_context
    )
    index.storage_context.persist()

In [65]:
from llama_index.llms import OpenAI
llm = OpenAI(model="gpt-3.5-turbo")

In [66]:
from llama_index import PromptTemplate
query_str = "Fale sobre o porto do Rio."

query_gen_prompt_str = (
    "You are a helpful assistant that generates multiple search queries based on a "
    "single input query. Generate {num_queries} search queries, one on each line, "
    "related to the following input query:\n"
    "Query: {query}\n"
    "Queries:\n"
)

query_gen_prompt = PromptTemplate(query_gen_prompt_str)

In [67]:
def generate_queries(llm, query_str: str, num_queries: int = 4):
    fmt_prompt = query_gen_prompt.format(
        num_queries=num_queries - 1, query=query_str
    )
    response = llm.complete(fmt_prompt)
    queries = response.text.split("\n")
    return queries

queries = generate_queries(llm, query_str, num_queries=4)
print(queries)

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
from tqdm.asyncio import tqdm

async def run_queries(queries, retrievers):
    """Run queries against retrievers."""
    tasks = []
    for query in queries:
        for i, retriever in enumerate(retrievers):
            tasks.append(retriever.aretrieve(query))

    task_results = await tqdm.gather(*tasks)

    results_dict = {}
    for i, (query, query_result) in enumerate(zip(queries, task_results)):
        results_dict[(query, i)] = query_result

    return results_dict

In [None]:
# get retrievers
from llama_index.retrievers import BM25Retriever

## vector retriever
vector_retriever = index.as_retriever(similarity_top_k=2)

## bm25 retriever
bm25_retriever = BM25Retriever.from_defaults(
    docstore=index.docstore, similarity_top_k=2
)

results_dict = await run_queries(queries, [vector_retriever, bm25_retriever])

In [None]:
def fuse_results(results_dict, similarity_top_k: int = 2):
    """Fuse results."""
    k = 60.0  # `k` is a parameter used to control the impact of outlier rankings.
    fused_scores = {}
    text_to_node = {}

    # compute reciprocal rank scores
    for nodes_with_scores in results_dict.values():
        for rank, node_with_score in enumerate(
            sorted(
                nodes_with_scores, key=lambda x: x.score or 0.0, reverse=True
            )
        ):
            text = node_with_score.node.get_content()
            text_to_node[text] = node_with_score
            if text not in fused_scores:
                fused_scores[text] = 0.0
            fused_scores[text] += 1.0 / (rank + k)

    # sort results
    reranked_results = dict(
        sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    )

    # adjust node scores
    reranked_nodes: List[NodeWithScore] = []
    for text, score in reranked_results.items():
        reranked_nodes.append(text_to_node[text])
        reranked_nodes[-1].score = score

    return reranked_nodes[:similarity_top_k]

In [None]:
final_results = fuse_results(results_dict)

In [None]:
from llama_index.response.notebook_utils import display_source_node

for n in final_results:
    display_source_node(n, source_length=500)

In [None]:
from llama_index import QueryBundle
from llama_index.retrievers import BaseRetriever
from typing import Any, List
from llama_index.schema import NodeWithScore


class FusionRetriever(BaseRetriever):
    """Ensemble retriever with fusion."""

    def __init__(
        self,
        llm,
        retrievers: List[BaseRetriever],
        similarity_top_k: int = 2,
    ) -> None:
        """Init params."""
        self._retrievers = retrievers
        self._similarity_top_k = similarity_top_k
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve."""
        queries = generate_queries(llm, query_str, num_queries=4)
        results = run_queries(queries, [vector_retriever, bm25_retriever])
        final_results = fuse_results(
            results_dict, similarity_top_k=self._similarity_top_k
        )

        return final_results

In [None]:
from llama_index.query_engine import RetrieverQueryEngine

fusion_retriever = FusionRetriever(
    llm, [vector_retriever, bm25_retriever], similarity_top_k=2
)

query_engine = RetrieverQueryEngine(fusion_retriever)
response = query_engine.query(query_str)

In [None]:
print(str(response))

Tudo daqui para cima foi com o objetivo de criar uma query engine com rag-fusion, daqui para frente iremos fazer a avaliação do modelo.

In [None]:
from ragevaluation import rag_evaluation

rag_evaluation(query_engine)

In [None]:
v