In [1]:
from datasets import load_dataset

import pandas as pd
import numpy as np
from tqdm.auto import tqdm

tqdm.pandas()

In [2]:
def subject_filter(x, subjects):
    if x['subject'] in subjects:
        return True
    else:
        return False

dataset = load_dataset("cais/mmlu", "all")
subjects = ['machine_learning', 'econometrics', 'abstract_algebra', 'professional_accounting', 'professional_medicine']

filtered_dataset = dataset.filter(lambda x: subject_filter(x, subjects)).filter(lambda example: len(example["question"]) < 128)

sampled_dataset = filtered_dataset.shuffle(seed=42)['test'][:100]

In [20]:
questions = np.array(sampled_dataset["question"])
choices = np.array(sampled_dataset["choices"])
answers = np.array(sampled_dataset["answer"])

data = {"Original": questions, "Choice1": choices[:, 0], "Choice2": choices[:, 1], "Choice3": choices[:, 2], "Choice4": choices[:, 3], 'Answer': answers}
df = pd.DataFrame(data)

df[['Choice1', 'Choice2', 'Choice3', 'Choice4', 'Answer', 'Original']].to_csv("mmlu_hard.csv", index=False)

## Conversions

### Questions

In [7]:
from openai import OpenAI
from datasets import load_dataset

import pandas as pd
import numpy as np
from tqdm.auto import tqdm

tqdm.pandas()

client = OpenAI(api_key="...")

In [8]:
mmlu_hard = pd.read_csv('data/mmlu_hard.csv')
mmlu_hard.head()

Unnamed: 0,Answer,Original,Interrogative,Declarative,Choice1,Choice2,Choice3,Choice4
0,2,Find the number of elements in the indicated c...,What's the number of elements in the cyclic su...,The number of elements in the cyclic subgroup ...,25,5,6,30
1,0,Find the order of the factor group (Z_11 x Z_1...,Which is the order of the factor group (Z_11 x...,The order of the factor group (Z_11 x Z_15)/(<...,1,2,5,11
2,2,A project should be accepted if the present va...,How should the present value of cash flows of ...,"To be accepted, the present value of cash flow...",Equal to the initial investment.,Less than the initial investment.,Greater than the initial investment.,Equal to zero.
3,0,Which of the following is correct concerning l...,Which of the following is correct concerning l...,Which of the following is correct concerning l...,They use a different method of transforming th...,The logit model can result in too many observa...,"For the logit model, the marginal effect of a ...",The probit model is based on a cumulative logi...
4,2,What is the price of a five-year bond (face va...,What is the price of a five-year bond (face va...,What is the price of a five-year bond (face va...,$100,$85,$82,$75


In [9]:
def convert_mmlu(row):

    response = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
            {
            "role": "system",
            "content": "Do the following conversions from question to indicatives.\n\nSENTENCE: What's the number of elements in the cyclic subgroup of Z_30 generated by 25?\nCONVERTED: The number of elements in the cyclic subgroup of Z_30 generated by 25 is\n\nSENTENCE: Which is the order of the factor group (Z_11 x Z_15)/(<1, 1>)?\nCONVERTED: The order of the factor group (Z_11 x Z_15)/(<1, 1>) is\n\nSENTENCE: How should the present value of cash flows of a project be to be accepted?\nCONVERTED: To be accepted, the present value of cash flows of a project should be\n\nDon't answer, just do the conversion!\n\n\n\n\n\n\n"
            },
            {
            "role": "user",
            "content": f"SENTENCE: {row}"
            }
        ],
        temperature=0,
        max_tokens=64,
        top_p=0.3,
        frequency_penalty=0,
        presence_penalty=0
        )

    return response.choices[0].message.content

mmlu_hard['Declarative'] = mmlu_hard['Interrogative'].progress_apply(convert_mmlu)

  0%|          | 0/100 [00:00<?, ?it/s]

In [14]:
def convert_mmlu(row):

    response = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
            {
            "role": "system",
            "content": "Do the following conversions from question to indicatives.\n\nSENTENCE: What's the number of elements in the cyclic subgroup of Z_30 generated by 25?\nCONVERTED: Identify the number of elements in the cyclic subgroup of Z_30 generated by 25.\n\nSENTENCE:  What's the order of the factor group (Z_11 x Z_15)/(<1, 1>)?\nCONVERTED: Pick the order of the factor group (Z_11 x Z_15)/(<1, 1>).\n\nSENTENCE: How should the present value of cash flows of a project be to be accepted?\nCONVERTED: Choose how the present value of cash flows of a project should be to be accepted.\n\nDon't answer, just do the conversion!\n\n\n\n\n\n\n"
            },
            {
            "role": "user",
            "content": f"SENTENCE: {row}"
            }
        ],
        temperature=0,
        max_tokens=32,
        top_p=0.3,
        frequency_penalty=0,
        presence_penalty=0
        )

    return response.choices[0].message.content

mmlu_hard['Imperative'] = mmlu_hard['Interrogative'].progress_apply(convert_mmlu)

  0%|          | 0/100 [00:00<?, ?it/s]

In [15]:
mmlu_hard['Declarative'] = mmlu_hard['Declarative'].apply(lambda x: x.split("CONVERTED: ")[-1])
mmlu_hard['Imperative'] = mmlu_hard['Imperative'].apply(lambda x: x.split("CONVERTED: ")[-1])

In [16]:
mmlu_hard.to_csv("data/mmlu_hard_conv.csv", index=False)