# Evaluation of RAG architectures

In [1]:
from tqdm import tqdm
from pathlib import Path
from llms.clients.gpt import GPTClient
from llms.settings import settings
from llms.rag.faiss import DistanceMetric
from llms.evaluation.code import evaluate_code_generation, ConfigGrid, RAG, RAGRetriever
from tests.pandas import TEST_CASES

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def get_texts(folder_paths: list[str]) -> list[str]:
    folder_paths = [Path(folder_path) for folder_path in folder_paths]
    files = []
    for folder_path in tqdm(folder_paths, desc="Searching for files"):
        for child in folder_path.iterdir():
            if child.is_file():
                files.append(child)
    texts = []
    for file in tqdm(files, desc="Reading files"):
        with open(file, "r", encoding="utf-8") as f:
            content = f.read()
            texts.append(content)
    return texts

In [3]:
folder_paths = ["files/pandas/textfiles/textfiles1", "files/pandas/textfiles/textfiles2", "files/pandas/textfiles/textfiles3"]
texts = get_texts(folder_paths)

Searching for files: 100%|██████████| 3/3 [00:00<00:00, 164.35it/s]
Reading files: 100%|██████████| 2061/2061 [00:03<00:00, 551.81it/s]


In [4]:
gpt_4_client = GPTClient(
    client_id=settings.CLIENT_ID,
    client_secret=settings.CLIENT_SECRET,
    auth_url=settings.AUTH_URL,
    api_base=settings.API_BASE,
    deployment_id='gpt-4-32k',
    max_response_tokens=1000,
    temperature=0.0,
)

In [5]:
config_grid = ConfigGrid(
    llms=[gpt_4_client],
    rag=RAG(
        retrievers=[
            RAGRetriever.NONE,
            RAGRetriever.RAG,
            RAGRetriever.RAG_AS_TOOL,
            RAGRetriever.CoALA,
            RAGRetriever.CoALA_AS_TOOL,
        ],
        distance_metrics=[DistanceMetric.EUCLIDEAN_DISTANCE],
        num_search_results=[3],
        similarity_search_score_thresholds=[0.0],
        text_chunk_sizes=[512],
        use_weighted_average_of_text_chunks=[True],
        texts=texts,
    )
)

In [6]:
results = evaluate_code_generation(config_grid=config_grid, test_cases=TEST_CASES, test_name="pandas")

13/01/24 13:13:14 INFO Current configuration: {'llm': 'gpt-4-32k', 'retriever': <RAGRetriever.NONE: 'NONE'>, 'distance_metric': None, 'num_search_results': None, 'similarity_search_score_threshold': None, 'text_chunk_size': None, 'use_weighted_average_of_text_chunks': None}
13/01/24 13:13:14 INFO Running test: {'prompt': 'How can I convert this dataframe: df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], "col2_c": [0, 0, 1]}) into a categorical dataframe?', 'data': 'data = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], "col2_c": [0, 0, 1]})', 'correct_function': 'import pandas as pd\ndef correct_function(data):\n    result = pd.from_dummies(data, sep="_")\n    return result'}
13/01/24 13:13:14 INFO User prompt: How can I convert this dataframe: df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], "col2_c": [0, 0, 1]}) into a categoric