In [1]:
import json
import yaml
from easydict import EasyDict
import os
import pandas as pd
from tqdm import tqdm
from langchain_openai import AzureChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema import SystemMessage 
from langchain_core.output_parsers import StrOutputParser
from openai import OpenAI
from multiprocessing.pool import ThreadPool

In [None]:
path_to_mcqs = 'file_path_here'
with open(path_to_mcqs, 'r') as f:
    mcqs = json.load(f)
    
len(mcqs)    

In [7]:
mcqs_df = pd.DataFrame(columns=['Engine', 'Context', 'Question', 'Correct Answer', 'Option 1', 'Option 2', 'Option 3', 'Option 4', 'Answer Key'])

In [8]:
def append_question_to_df(question, mcqs_df):
    question_dict = {}
    for line in question.split('\n'):
        if line.startswith('Engine:'):
            question_dict['Engine'] = line.split('Engine:')[1].strip()
        elif line.startswith('Context:'):
            question_dict['Context'] = line.split('Context:')[1].strip()
        elif line.startswith('السؤال'):
            question_dict['Question'] = line.split(':')[1].strip()
        elif line.startswith('الإجابة الصحيحة:'):
            question_dict['Correct Answer'] = line.split('الإجابة الصحيحة:')[1].strip()
        elif line.startswith('1.'):
            question_dict['Option 1'] = line.split('1.')[1].strip()
        elif line.startswith('2.'):
            question_dict['Option 2'] = line.split('2.')[1].strip()
        elif line.startswith('3.'):
            question_dict['Option 3'] = line.split('3.')[1].strip()
        elif line.startswith('4.'):
            question_dict['Option 4'] = line.split('4.')[1].strip()
    question_dict['Answer Key'] = '1' # Default answer key is Option 1
    mcqs_df = pd.concat([mcqs_df, pd.DataFrame([question_dict])], ignore_index=True)

In [9]:
for question in mcqs:
    append_question_to_df(question, mcqs_df)

In [11]:
mcqs_df.to_csv('MCQs.csv', index=False, encoding='utf-8-sig')

In [12]:
def reconstruct_question(row):
    question = f"سياق النص: {row['Context']}\n\nالسؤال: {row['Question']}\n\n1.{row['Option 1']}\n2.{row['Option 2']}\n3.{row['Option 3']}\n4.{row['Option 4']}\n\nالإجابة الصحيحة: {row['Correct Answer']}"
    return question

In [160]:
config = EasyDict(yaml.safe_load(open("defaults.yaml")))
print(config.FILTER_MCQ_PROMPT)

You are a law Professor.
You have a bank of multiple choice questions.
Your task is to filter out the questions that are not relevant to the context information provided.
The questions are in Arabic.
For the following context and question:
Question: {question}

Answer the following:
- Is the question complete, meaning it has a context, question, correct answer, and distractors? 0 for NO, 1 for YES
- Is the question relevant to the context information? 0 for NO, 1 for YES
- Are the ditractors for the question all incorrect? 0 for NO, 1 for YES
- Is the correct answer the first option? 0 for NO, 1 for YES
- Does the question need the provided context to be answered? 0 for NO, 1 for YES
- Are all the distractors unique? 0 for NO, 1 for YES

Provide the answers in the following format, Do not output anything else:
complete_mcq: 0 or 1
question_relevance: 0 or 1
distractors_correctness: 0 or 1
correct_answer_first: 0 or 1
context_needed: 0 or 1
unique_distractors: 0 or 1
total_score: betwee

In [162]:
class MCQEvaluator:
    def __init__(self, evaluation_prompt):
        """
        A GPT model that translates text from Arabic to English and vice versa.
        """
        self.client = OpenAI()
        self.evaluation_prompt = evaluation_prompt         
    
    def Evaluate(self, mcq):
        
        completion = self.client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": self.evaluation_prompt.format(question=mcq)},

        ]
        )
        
        eval = completion.choices[0].message.content
        
        return eval

In [163]:
evaluator = MCQEvaluator(config.FILTER_MCQ_PROMPT)

In [164]:
def parse_evaluation(evaluation):
    evaluation = evaluation.split("\n")
    evaluation_dict = {}
    for line in evaluation:
        key, value = line.split(": ")
        evaluation_dict[key] = int(value)
    assert len(evaluation_dict) == 7
    return evaluation_dict

In [165]:
def get_eval(index, row):
    question = reconstruct_question(row)
    evaluation = evaluator.Evaluate(question)
    try: 
        evaluation_dict = parse_evaluation(evaluation)
    except Exception as e:
        print(evaluation)
        print(f"Error evaluating question {index}: {e}")
        evaluation_dict = {'contains_mcq': 0, 'question_relevance': 0, 'distractors_correctness': 0, 'correct_answer_first': 0, 'context_needed': 0, 'unique_distractors': 0, 'total_score': 0}
    return evaluation_dict

In [None]:
mcqs_df = pd.read_csv('MCQs.csv')
mcqs_df.head()

In [168]:
evaluations = []

In [None]:
batch_size = 10
for i in tqdm(range(0, len(gpt_4_df), batch_size), f"Evaluating New Prompt MCQs"):
    evaluations.extend(list(
        ThreadPool().imap(
            lambda x: get_eval(*x),
            mcqs_df.iloc[i:i+batch_size].iterrows()
        )))
    print(evaluations[-1])
    with open('mcqs_newprompt_eval.json', 'w') as f:
        json.dump(evaluations, f)
    


In [170]:
eval_df = pd.DataFrame(columns=['complete_mcq', 'question_relevance', 'distractors_correctness', 'correct_answer_first', 'context_needed', 'unique_distractors', 'total_score'])

for evaluation in evaluations:
    eval_df = pd.concat([eval_df, pd.DataFrame([evaluation])], ignore_index=True)


In [None]:
append_df = pd.concat([mcqs_df, eval_df], axis=1)
append_df.head()

In [174]:
append_df.to_csv('MCQs_evaluated.csv', index=False, encoding='utf-8-sig')

In [None]:
filtered_df = append_df[append_df['total_score'] == 6].dropna()
filtered_df.info()

In [None]:
import pandas as pd
import numpy as np


def shuffle_options(row):
    # Extract the options into a list
    correct_answer = row['Option 1']
    options = [row['Option 1'], row['Option 2'], row['Option 3'], row['Option 4']]
    
    # Shuffle the options
    np.random.shuffle(options)
    
    # Find the new index of the correct answer
    new_answer_key = options.index(correct_answer) 
    
    # Update the row with the shuffled options and new answer key
    row['Option 1'], row['Option 2'], row['Option 3'], row['Option 4'] = options
    row['Answer Key'] = new_answer_key
    
    return row

# Apply the function to each row
shuffled_filtered_df = filtered_df.apply(shuffle_options, axis=1)