# Evaluation of RAG architectures

In [1]:
from tqdm import tqdm
from pathlib import Path
from llms.clients.gpt import GPTClient
from llms.settings import settings
from llms.rag.faiss import DistanceMetric
from llms.evaluation.code import evaluate_code_generation, ConfigGrid, RAG, RAGRetriever
from tests.pandas_v2 import TEST_CASES

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def get_texts(folder_paths: list[str]) -> list[str]:
    folder_paths = [Path(folder_path) for folder_path in folder_paths]
    files = []
    for folder_path in tqdm(folder_paths, desc="Searching for files"):
        for child in folder_path.iterdir():
            if child.is_file():
                files.append(child)
    texts = []
    for file in tqdm(files, desc="Reading files"):
        with open(file, "r", encoding="utf-8") as f:
            content = f.read()
            texts.append(content)
    return texts

In [3]:
folder_paths = ["files/pandas/textfiles/textfiles1", "files/pandas/textfiles/textfiles2", "files/pandas/textfiles/textfiles3"]
texts = get_texts(folder_paths)

Searching for files: 100%|██████████| 3/3 [00:00<00:00, 241.84it/s]
Reading files: 100%|██████████| 2061/2061 [00:00<00:00, 10984.56it/s]


In [4]:
gpt_4_client = GPTClient(
    client_id=settings.CLIENT_ID,
    client_secret=settings.CLIENT_SECRET,
    auth_url=settings.AUTH_URL,
    api_base=settings.API_BASE,
    deployment_id='gpt-4-32k',
    max_response_tokens=1000,
    temperature=0.0,
)

In [5]:
config_grid = ConfigGrid(
    llms=[gpt_4_client],
    rag=RAG(
        retrievers=[
            RAGRetriever.NONE,
            RAGRetriever.RAG,
            RAGRetriever.RAG_AS_TOOL,
            RAGRetriever.CoALA,
            RAGRetriever.CoALA_AS_TOOL,
        ],
        distance_metrics=[DistanceMetric.EUCLIDEAN_DISTANCE, DistanceMetric.MAX_INNER_PRODUCT],
        num_search_results=[3],
        similarity_search_score_thresholds=[0.0],
        text_chunk_sizes=[512],
        use_weighted_average_of_text_chunks=[True],
        texts=texts,
    )
)

In [6]:
results = evaluate_code_generation(config_grid=config_grid, test_cases=TEST_CASES, test_name="pandas_test_cases_v2_run_002")

20/01/24 08:16:14 INFO Current configuration: {'llm': 'gpt-4-32k', 'retriever': <RAGRetriever.NONE: 'NONE'>, 'distance_metric': None, 'num_search_results': None, 'similarity_search_score_threshold': None, 'text_chunk_size': None, 'use_weighted_average_of_text_chunks': None}
20/01/24 08:16:14 INFO Running test: {'prompt': "\n        I have a one-hot encoded DataFrame with '_' as the separator.\n        How can I revert this one-hot encoded DataFrame back into a categorical DataFrame using pandas?\n\n        The following DataFrame will be the only function argument:\n        df = pd.DataFrame({\n            'col1_a': [1, 0, 1],\n            'col1_b': [0, 1, 0],\n            'col2_a': [0, 1, 0],\n            'col2_b': [1, 0, 0],\n            'col2_c': [0, 0, 1],\n        })", 'data': 'data = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], "col2_c": [0, 0, 1]})', 'correct_function': 'import pandas as pd\ndef correct_function(data):\n    re

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

In [None]:
path = "./results"
df = pd.read_csv(f"{path}/pandas_test_cases_v2.csv")

In [None]:
display(df)

In [None]:
df.plot(kind='bar', x='index', y='accuracy', legend=False)
plt.title('Accuracy for solving test cases')
plt.xlabel('Id')
plt.ylabel('Accuracy')
plt.show()

In [None]:
fig = px.bar(df, x='index', y='accuracy', title='Accuracy for solving test cases',
             labels={'index': 'Id', 'accuracy': 'Accuracy'}, width=800, height=500)

fig.update_layout(xaxis_title='Configuration', yaxis_title='Accuracy', showlegend=False, bargap=0.2,
                    xaxis=dict(tickmode='linear', tick0=0, dtick=1),
                    yaxis=dict(tickfont=dict(size=25)),
                    yaxis_title_font=dict(size=25),
                    xaxis_title_font=dict(size=25),
                    title_font=dict(size=30))

fig.show()
fig.write_image("total_acc.png", width=1200, height=800, scale=2)  # Adjust width, height, and scale as needed

In [None]:
df.plot(kind='bar', x='index', y='total_cost', legend=False)
plt.title('Total cost for solving test cases')
plt.xlabel('Id')
plt.ylabel('Total Cost in $')
plt.show()

In [None]:
fig = px.bar(df, x='index', y='total_cost', title='Total Cost for Configuration (20 TC)',
             labels={'index': 'Id', 'total_cost': 'Total Cost'}, width=800, height=500)

fig.update_layout(xaxis_title='Configuration', yaxis_title='Total Cost', showlegend=False, bargap=0.2,
                    xaxis=dict(tickmode='linear', tick0=0, dtick=1),
                    yaxis=dict(tickfont=dict(size=25)),
                    yaxis_title_font=dict(size=25),
                    xaxis_title_font=dict(size=25),
                    title_font=dict(size=30))

fig.show()
fig.write_image("total_cost.png", width=1200, height=800, scale=2)  # Adjust width, height, and scale as needed

In [None]:
df.plot(kind='bar', x='index', y='total_time', legend=False)
plt.title('Total time for solving test cases')
plt.xlabel('Id')
plt.ylabel('Total Time in seconds')
plt.show()

In [None]:
fig = px.bar(df, x='index', y='total_time', title='Total Time taken for 20 Test Cases',
             labels={'index': 'Id', 'total_time': 'Total Time'}, width=800, height=500)

fig.update_layout(xaxis_title='Configuration', yaxis_title='Total Time taken (seconds)', showlegend=False, bargap=0.2,
                    xaxis=dict(tickmode='linear', tick0=0, dtick=1),
                    yaxis=dict(tickfont=dict(size=20)),
                    yaxis_title_font=dict(size=25),
                    xaxis_title_font=dict(size=25),
                    title_font=dict(size=30))

fig.show()
fig.write_image("total_time.png", width=1200, height=800, scale=2)  # Adjust width, height, and scale as needed

In [None]:
details_path = df.iloc[0].details_csv_filepath
details_df = pd.read_csv(details_path)
details_df