In [1]:
import json
from openai import OpenAI
from groq import Groq
import os
from sentence_transformers import SentenceTransformer
from tqdm.auto import tqdm
import pickle
from elasticsearch import Elasticsearch
import pandas as pd
import requests
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from requests.exceptions import HTTPError

import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
prompt1_template = """
You are an expert evaluator for a Retrieval-Augmented Generation (RAG) system.
Your task is to analyze the relevance of the generated answer compared to the original answer provided.
Based on the relevance and similarity of the generated answer to the original answer, you will classify
it as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Original Answer: {answer_orig}
Generated Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the original
answer and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

prompt2_template = """
You are an expert evaluator for a Retrieval-Augmented Generation (RAG) system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

In [4]:
client =  Groq(api_key = os.environ['GROQ_API_KEY2'])
def llm(prompt, model = 'mixtral-8x7b-32768'):
    retries = 5
    for i in range(retries):
        try:
            response = client.chat.completions.create(
                model= 'llama3-8b-8192',
                messages=[{"role": "user", "content": prompt}]
            )
            json_response = response.choices[0].message.content
            return json_response
        except HTTPError as e:
            if e.response.status_code == 429:  # Rate limit error
                retry_after = float(e.response.json()['error']['message'].split('in ')[-1].split('s')[0])
                time.sleep(retry_after)
            else:
                raise
        except Exception as e:
            if i < retries - 1:
                time.sleep(2 ** i)  # Exponential backoff
            else:
                raise

In [None]:
for i in range(1, 83):
    evaluations = []
    results = []
    json_evaluations = []
    
    # Load the pickle file
    with open(f"../data/vietnamese_rag/llm_answer/llm_answer{i}.pkl", 'rb') as file:
        r = pickle.load(file)
    results.extend(r)
    
    # Convert results to DataFrame
    df = pd.DataFrame(results)
    samples = df.to_dict(orient='records')
    
    # Generate evaluations
    for record in tqdm(samples):
        prompt = prompt2_template.format(**record)
        evaluation = llm(prompt)
        evaluations.append(evaluation)
    
    # Parse evaluations
    for j, str_eval in enumerate(evaluations):
        try:
            json_eval = json.loads(str_eval)
        except json.JSONDecodeError as e:
            print(f"JSONDecodeError: {e}")
            # Attempt to fix the JSON string
            try:
                str_eval = str_eval.rstrip('}') + '}'  # Ensure it ends with a closing brace
                json_eval = json.loads(str_eval)
            except json.JSONDecodeError as e:
                print(f"Failed to fix JSON string: {e}")
                continue  # Skip this evaluation if it cannot be fixed
        json_evaluations.append(json_eval)
    
    # Save evaluations to CSV
    if json_evaluations:
        df_evaluations = pd.DataFrame(json_evaluations)
        df_evaluations.to_csv(f'../data/vietnamese_rag/evaluations_qa/evaluations-qa{i}.csv', index=False)
        print(f"evaluations-qa{i}.csv saved")

100%|███████████████████████████████████████████████████| 15/15 [00:06<00:00,  2.44it/s]


JSONDecodeError: Expecting ',' delimiter: line 3 column 357 (char 385)
JSONDecodeError: Expecting ',' delimiter: line 3 column 415 (char 450)
JSONDecodeError: Expecting ',' delimiter: line 3 column 354 (char 382)
evaluations-qa1.csv saved


100%|███████████████████████████████████████████████████| 15/15 [00:05<00:00,  2.70it/s]


JSONDecodeError: Expecting ',' delimiter: line 3 column 300 (char 328)
JSONDecodeError: Expecting ',' delimiter: line 3 column 284 (char 319)
JSONDecodeError: Expecting ',' delimiter: line 3 column 359 (char 385)
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Failed to fix JSON string: Expecting value: line 1 column 1 (char 0)
JSONDecodeError: Expecting ',' delimiter: line 3 column 252 (char 280)
evaluations-qa2.csv saved


100%|███████████████████████████████████████████████████| 15/15 [00:39<00:00,  2.63s/it]


JSONDecodeError: Expecting ',' delimiter: line 3 column 229 (char 261)
JSONDecodeError: Expecting ',' delimiter: line 3 column 312 (char 340)
JSONDecodeError: Expecting ',' delimiter: line 3 column 397 (char 432)
evaluations-qa3.csv saved


100%|███████████████████████████████████████████████████| 15/15 [00:38<00:00,  2.57s/it]


JSONDecodeError: Expecting ',' delimiter: line 3 column 224 (char 252)
JSONDecodeError: Expecting ',' delimiter: line 3 column 230 (char 258)
JSONDecodeError: Expecting ',' delimiter: line 3 column 332 (char 358)
JSONDecodeError: Expecting ',' delimiter: line 3 column 320 (char 348)
JSONDecodeError: Expecting ',' delimiter: line 3 column 247 (char 275)
JSONDecodeError: Expecting ',' delimiter: line 3 column 266 (char 294)
JSONDecodeError: Expecting ',' delimiter: line 3 column 288 (char 316)
JSONDecodeError: Expecting ',' delimiter: line 3 column 391 (char 426)
evaluations-qa4.csv saved


100%|███████████████████████████████████████████████████| 15/15 [00:38<00:00,  2.56s/it]


JSONDecodeError: Expecting ',' delimiter: line 3 column 319 (char 347)
JSONDecodeError: Expecting ',' delimiter: line 3 column 279 (char 307)
JSONDecodeError: Expecting ',' delimiter: line 3 column 204 (char 232)
JSONDecodeError: Expecting ',' delimiter: line 3 column 279 (char 307)
JSONDecodeError: Expecting ',' delimiter: line 3 column 209 (char 237)
JSONDecodeError: Expecting ',' delimiter: line 3 column 220 (char 248)
JSONDecodeError: Expecting ',' delimiter: line 3 column 244 (char 272)
evaluations-qa5.csv saved


100%|███████████████████████████████████████████████████| 15/15 [00:38<00:00,  2.56s/it]


JSONDecodeError: Expecting ',' delimiter: line 3 column 254 (char 282)
JSONDecodeError: Expecting ',' delimiter: line 3 column 296 (char 324)
JSONDecodeError: Expecting ',' delimiter: line 3 column 363 (char 398)
JSONDecodeError: Expecting ',' delimiter: line 3 column 264 (char 292)
JSONDecodeError: Expecting ',' delimiter: line 3 column 316 (char 344)
JSONDecodeError: Expecting ',' delimiter: line 3 column 326 (char 354)
JSONDecodeError: Expecting ',' delimiter: line 3 column 203 (char 233)
evaluations-qa6.csv saved


100%|███████████████████████████████████████████████████| 15/15 [00:38<00:00,  2.56s/it]


JSONDecodeError: Expecting ',' delimiter: line 3 column 284 (char 312)
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Failed to fix JSON string: Expecting value: line 1 column 1 (char 0)
JSONDecodeError: Expecting ',' delimiter: line 3 column 348 (char 376)
JSONDecodeError: Expecting ',' delimiter: line 3 column 282 (char 308)
JSONDecodeError: Expecting ',' delimiter: line 3 column 174 (char 202)
JSONDecodeError: Expecting ',' delimiter: line 3 column 318 (char 346)
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Failed to fix JSON string: Expecting value: line 1 column 1 (char 0)
evaluations-qa7.csv saved


100%|███████████████████████████████████████████████████| 15/15 [00:38<00:00,  2.54s/it]


JSONDecodeError: Expecting ',' delimiter: line 3 column 321 (char 356)
JSONDecodeError: Expecting ',' delimiter: line 3 column 356 (char 389)
JSONDecodeError: Expecting ',' delimiter: line 3 column 365 (char 400)
JSONDecodeError: Expecting ',' delimiter: line 3 column 255 (char 281)
JSONDecodeError: Expecting ',' delimiter: line 3 column 345 (char 373)
JSONDecodeError: Expecting ',' delimiter: line 3 column 199 (char 227)
JSONDecodeError: Expecting ',' delimiter: line 3 column 282 (char 314)
JSONDecodeError: Expecting ',' delimiter: line 3 column 299 (char 332)
JSONDecodeError: Expecting ',' delimiter: line 3 column 365 (char 400)
evaluations-qa8.csv saved


  0%|                                                            | 0/15 [00:00<?, ?it/s]