In [1]:
import asyncio
# make the async code working in the notebook cells
import nest_asyncio
nest_asyncio.apply()

from llama_index.finetuning import EmbeddingQAFinetuneDataset
from llama_index import StorageContext, load_index_from_storage

from openai import AsyncOpenAI

from evaluate_rag_utils import Validator

In [2]:
# load question: node dataset
qa_dataset_1000 = EmbeddingQAFinetuneDataset.from_json('question_node_dataset_1000.json')
# get the questions
eval_questions = qa_dataset_1000.queries.values()

# load indexed nodes
storage_cntxt_512 = StorageContext.from_defaults(persist_dir="../../data/index_storage_512")
idx_512 = load_index_from_storage(storage_cntxt_512)
storage_cntxt_1024 = StorageContext.from_defaults(persist_dir="../../data/index_storage_1024")
idx_1024 = load_index_from_storage(storage_cntxt_1024)

In [3]:
async def main():
    client = AsyncOpenAI()
    semaphore = asyncio.Semaphore(100)

    # params for different Validators to be instantiated
    params_list = [
        {
            'client': client,
            'index': idx_1024,
            'semaphore': semaphore
            },
        {
            'client': client,
            'index': idx_512,
            'semaphore': semaphore,
            'has_node_postprocessors': True
            }
    ]
    # classes that validate QA dataset with default evaluators
    validators = [Validator(**params) for params in params_list]
    # paths for JSON files with evaluated query - node - answer combinations
    output_paths = [
        'rag_evaluate_json_1024.json',
        'rag_evaluate_json_512_postprocess_both_1.json',
    ]

    # generate Q-A for 2 questions and evaluate them
    async with asyncio.TaskGroup() as tg:
                    tasks = [
                        tg.create_task(
                            validator.answers_evaluated_list(
                                queries=list(eval_questions)[:2],
                                output_path=output_path
                                )
                        )
                        for validator, output_path in zip(validators, output_paths)
                    ]

if __name__ == '__main__':
    asyncio.run(main())