In [60]:
import pandas as pd
import json
import ast
import re

In [61]:
# Load your JSON content into a dictionary
with open('response_dict.json', 'r', encoding='utf-8') as file:
    json_data = json.load(file)

In [62]:
splits = {'validation': 'all/validation-00000-of-00001.parquet'}
dataset_df = pd.read_parquet("hf://datasets/cais/mmlu/" + splits["validation"])

In [63]:
# Extract the relevant data from JSON
instructions = []
choices = []
responses_json = []
good_answer = []
for key in json_data:
    for entry in json_data[key]:
        # Extraire les instructions, choix et réponses
        instruction_start = entry.find('### Instruction:') + len('### Instruction:')
        instruction_end = entry.find('### Choices:')
        instruction = entry[instruction_start:instruction_end].strip()
        
        choices_start = entry.find('### Choices:') + len('### Choices:')
        choices_end = entry.find('### Response:')
        choice = entry[choices_start:choices_end].strip()
        
        response_start = entry.find('### Response:') + len('### Response:')
        response = entry[response_start:].strip()
        
        instructions.append(instruction)
        choices.append(choice)
        responses_json.append(response)

In [64]:
# Combine the data
data = {
    'Instruction': instructions,
    'Choices': choices,
    'Subject' : dataset_df['subject'],
    'Answer Dataset': dataset_df['answer'],  # This assumes the order matches
    'Answer Json': responses_json
}

In [65]:
# Create the DataFrame
df = pd.DataFrame(data)

def remove_non_printable(text):
    return re.sub(r'[^\x20-\x7E]', '', text)

def remove_eos_token(text):
    clean_text = re.sub(r'<.*?>', '', text)
    return clean_text


In [66]:
def compare_answer(row):
    """Compares the model's response to the expected answer, taking context into account.

    Args:
        row (pandas.Series): A row from the dataframe.

    Returns:
        bool: True if the model's response matches the expected answer, False otherwise.
    """
    expected_answer = ast.literal_eval(row['Choices'])[row['Answer Dataset']]
    model_response = row['Answer Json']
    # Check if the expected answer is present in the text
    if re.search(r'\b' + re.escape(expected_answer) + r'\b', model_response) or expected_answer == model_response:
        return True
    else:
        return False

In [67]:
df['Answer Json'] = df['Answer Json'].apply(remove_eos_token)
df['Good Answer'] = df.apply(compare_answer, axis=1)

In [68]:
df_print = df.applymap(lambda x: remove_non_printable(str(x)))
#  Save to Excel
df_print.to_excel('result_inference_mmlu.xlsx', index=False)

  df_print = df.applymap(lambda x: remove_non_printable(str(x)))


In [69]:
result = df.groupby('Subject')['Good Answer'].mean().reset_index()
result.columns = ['Subject', 'Average Good Answer']

print(result)

                                Subject  Average Good Answer
0                      abstract_algebra             0.181818
1                               anatomy             0.000000
2                             astronomy             0.000000
3                       business_ethics             0.000000
4                    clinical_knowledge             0.000000
5                       college_biology             0.062500
6                     college_chemistry             0.125000
7              college_computer_science             0.000000
8                   college_mathematics             0.000000
9                      college_medicine             0.045455
10                      college_physics             0.000000
11                    computer_security             0.000000
12                   conceptual_physics             0.115385
13                         econometrics             0.083333
14               electrical_engineering             0.125000
15               element