In [None]:
%pip install git+https://github.com/bigscience-workshop/petals

In [1]:
import torch
from transformers import AutoTokenizer
from petals import AutoDistributedModelForCausalLM

# model_name = "petals-team/StableBeluga2"
model_name = "meta-llama/Meta-Llama-3-8B"
# You can also use any other supported model from 🤗 Model Hub

# tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, add_bos_token=False, use_auth_token="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")
# model = AutoDistributedModelForCausalLM.from_pretrained(model_name, use_auth_token="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")
# model = model.cuda()

In [8]:
import argparse
import json
import os
import time
import json
import os
import time 
from tqdm import tqdm
import argparse
from pathlib import Path
from typing import Tuple
import pandas as pd
import torch
import transformers
from datasets import load_dataset
from transformers import LlamaForCausalLM, LlamaTokenizer, AutoTokenizer, AutoModel, AutoModelForCausalLM
import accelerate

import pandas as pd
import torch
from tqdm import tqdm

TASKS = [
        'abstract_algebra',
        'astronomy',
        'business_ethics',
        'clinical_knowledge',
        'college_biology',
        'college_chemistry',
        'college_computer_science',
        'college_mathematics',
        'college_medicine',
        'college_physics',
        'computer_security',
        'conceptual_physics',
        'econometrics',
        'electrical_engineering',
        'elementary_mathematics',
        'formal_logic',
        'global_facts',
        'high_school_biology',
        'high_school_chemistry',
        'high_school_computer_science',
        'high_school_geography',
        'high_school_macroeconomics',
        'high_school_mathematics',
        'high_school_microeconomics',
        'high_school_physics',
        'high_school_psychology',
        'high_school_statistics',
        'human_aging',
        'human_sexuality',
        'international_law',
        'jurisprudence',
        'logical_fallacies',
        'machine_learning',
        'management',
        'marketing',
        'medical_genetics',
        'miscellaneous',
        'moral_disputes',
        'moral_scenarios',
        'nutrition',
        'philosophy',
        'professional_accounting',
        'professional_law',
        'professional_medicine',
        'professional_psychology',
        'public_relations',
        'world_religions']

choices = ["A", "B", "C", "D"]

def compute_metric(output_filename):
    with open(output_filename, 'r') as f:
        run_results = json.load(f)
    total_acc = 0
    total_num = 0
    for task in run_results:
        acc = 0
        pred_answers = run_results[task]['pred_answers']
        gold_answers = run_results[task]['gold_answers']
        for pred, gold in zip(pred_answers, gold_answers):
            if pred == gold: acc += 1
        print("ACC-%s: %.4f" % (task, acc/len(gold_answers)))
        total_acc += acc
        total_num += len(gold_answers)
    print("ACC-all: %.4f" % (total_acc/total_num))


def format_subject(subject):
    l = subject.split("_")
    s = ""
    for entry in l:
        s += " " + entry
    return s

def format_example(df, idx, include_answer=True):
    prompt = df.iloc[idx, 0]
    k = df.shape[1] - 2
    for j in range(k):
        prompt += "\n{}. {}".format(choices[j], df.iloc[idx, j+1])
    prompt += "\nAnswer:"
    if include_answer:
        prompt += " {}\n\n".format(df.iloc[idx, k + 1])
    return prompt

def gen_prompt(train_df, subject, k=-1):
    prompt = "The following are multiple choice questions (with answers) about {}.\n\n".format(format_subject(subject))
    if k == -1:
        k = train_df.shape[0]
    for i in range(k):
        prompt += format_example(train_df, i)
    return prompt


# def custom_stopping_criteria(input_ids, score, **kwargs):
#     stop_ids = [29871, 13, 13] # \n\n 
#     return input_ids[-len(stop_ids)]

def prepare_input(tokenizer, prompts):
    input_tokens = tokenizer.batch_encode_plus(prompts, return_tensors="pt", padding=True)
    for t in input_tokens:
        if torch.is_tensor(input_tokens[t]):
            input_tokens[t] = input_tokens[t].to('cuda')

    return input_tokens

def load(model_type):
    n_gpus = torch.cuda.device_count()
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        use_fast=False,
        padding_side="left", token = "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",
    )
    tokenizer.pad_token_id = 0 if tokenizer.pad_token_id is None else tokenizer.pad_token_id
    tokenizer.bos_token_id = 1
    
    if model_type == 'llama':
        # we use tensor parallel for loading llama
        model = AutoModelForCausalLM.from_pretrained(model_name, token= "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", low_cpu_mem_usage = True, torch_dtype=torch.float16)
        model.cuda()
    else:
        model = AutoModelForCausalLM.from_pretrained(model_name, token = "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", device_map = 'balanced_low_0', torch_dtype=torch.float16, trust_remote_code=True)
    model.eval()

    return model, tokenizer

def batch_split(prompts, batch_num):
    batch_prompts = []
    mini_batch = []
    for prompt in prompts:
        mini_batch.append(prompt)
        if len(mini_batch) == batch_num:
            batch_prompts.append(mini_batch)
            mini_batch = []
    if len(mini_batch) != 0:
        batch_prompts.append(mini_batch)
    return batch_prompts

def batch_infer(model, tokenizer, prompts):
    batch_size = 4
    answers = []
    for batch_input in tqdm(batch_split(prompts, batch_size)):
        encode_inputs = prepare_input(tokenizer, batch_input)
        outputs = model.generate(**encode_inputs, max_new_tokens=1)
        answers.extend(tokenizer.batch_decode(outputs, skip_special_tokens=True))
    answers = [answer[-1] for answer in answers]
    return answers

In [9]:
def main(data_dir: str, param_size: str, model_type: str):
    
    run_results = {}
    output_filename = 'run_results_%s_%sb.json' % (model_type, param_size)
    
    model, tokenizer = load(model_type)
    start_time = time.time()
    for task in TASKS:
        print('Testing %s ...' % task)
        records = []
        dev_df = pd.read_csv(os.path.join(data_dir, "dev", task + "_dev.csv"), header=None)[:5]
        test_df = pd.read_csv(os.path.join(data_dir, "test", task + "_test.csv"), header=None)
        for i in range(test_df.shape[0]):
            # get prompt and make sure it fits
            k = 5
            prompt_end = format_example(test_df, i, include_answer=False)
            train_prompt = gen_prompt(dev_df, task, k)
            prompt = train_prompt + prompt_end
            while len(tokenizer.tokenize(prompt)) + 1> 2048: # bos token
                prompt_split = prompt.split("\n\n")
                prompt_split.pop(1)
                prompt = '\n\n'.join(prompt_split)
            label = test_df.iloc[i, test_df.shape[1]-1]
            records.append({'prompt':prompt, 'answer':label})

        pred_answers = batch_infer(model, tokenizer, [record['prompt'] for record in records])
        gold_answers = [record['answer'] for record in records]
        run_results[task] = {'pred_answers':pred_answers, 'gold_answers':gold_answers}
    with open(output_filename, 'w') as f:
        json.dump(run_results, f, ensure_ascii=False, indent=2)
    
    compute_metric(output_filename)
    end_time = time.time()
    print("total run time %.2f" % (end_time - start_time))

In [10]:
main('data', '8B', 'llama')

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Testing abstract_algebra ...


  0%|          | 0/25 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 1/25 [00:00<00:08,  2.71it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 2/25 [00:00<00:08,  2.72it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 3/25 [00:01<00:08,  2.72it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 4/25 [00:01<00:07,  2.65it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 20%|██        | 5/25 [00:01<00:07,  2.67it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 24%|██▍       | 6/25 [00:02<00:07,  2.68it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 28%|██▊       | 7/25 [00:02<00:06,  2.70it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 32%|███▏      | 8/25 [00:02<00:06,  2.72it/s]Setting `p

Testing astronomy ...


  0%|          | 0/38 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 1/38 [00:00<00:20,  1.77it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▌         | 2/38 [00:01<00:20,  1.75it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 3/38 [00:01<00:21,  1.66it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 11%|█         | 4/38 [00:02<00:19,  1.73it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 13%|█▎        | 5/38 [00:02<00:18,  1.74it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 6/38 [00:03<00:18,  1.75it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 18%|█▊        | 7/38 [00:03<00:17,  1.78it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 21%|██        | 8/38 [00:04<00:17,  1.76it/s]Setting `p

Testing business_ethics ...


  0%|          | 0/25 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 1/25 [00:00<00:13,  1.75it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 2/25 [00:01<00:12,  1.79it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 3/25 [00:01<00:12,  1.81it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 4/25 [00:02<00:11,  1.82it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 20%|██        | 5/25 [00:02<00:10,  1.82it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 24%|██▍       | 6/25 [00:03<00:10,  1.79it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 28%|██▊       | 7/25 [00:03<00:09,  1.81it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 32%|███▏      | 8/25 [00:04<00:09,  1.79it/s]Setting `p

Testing clinical_knowledge ...


  0%|          | 0/67 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|▏         | 1/67 [00:00<00:24,  2.68it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 2/67 [00:00<00:26,  2.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 3/67 [00:01<00:26,  2.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▌         | 4/67 [00:01<00:25,  2.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 5/67 [00:02<00:25,  2.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  9%|▉         | 6/67 [00:02<00:24,  2.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|█         | 7/67 [00:02<00:23,  2.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 8/67 [00:03<00:23,  2.51it/s]Setting `p

Testing college_biology ...


  0%|          | 0/36 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 1/36 [00:00<00:15,  2.25it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▌         | 2/36 [00:00<00:14,  2.30it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 3/36 [00:01<00:14,  2.28it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 11%|█         | 4/36 [00:01<00:15,  2.09it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 14%|█▍        | 5/36 [00:02<00:14,  2.14it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 17%|█▋        | 6/36 [00:02<00:13,  2.16it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 19%|█▉        | 7/36 [00:03<00:14,  2.05it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 22%|██▏       | 8/36 [00:03<00:13,  2.11it/s]Setting `p

Testing college_chemistry ...


  0%|          | 0/25 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 1/25 [00:00<00:13,  1.84it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 2/25 [00:01<00:12,  1.90it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 3/25 [00:01<00:12,  1.83it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 4/25 [00:02<00:11,  1.83it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 20%|██        | 5/25 [00:02<00:10,  1.84it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 24%|██▍       | 6/25 [00:03<00:10,  1.84it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 28%|██▊       | 7/25 [00:03<00:09,  1.84it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 32%|███▏      | 8/25 [00:04<00:09,  1.81it/s]Setting `p

Testing college_computer_science ...


  0%|          | 0/25 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 1/25 [00:00<00:21,  1.12it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 2/25 [00:01<00:19,  1.16it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 3/25 [00:02<00:18,  1.17it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 4/25 [00:03<00:17,  1.19it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 20%|██        | 5/25 [00:04<00:16,  1.21it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 24%|██▍       | 6/25 [00:04<00:15,  1.25it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 28%|██▊       | 7/25 [00:05<00:14,  1.24it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 32%|███▏      | 8/25 [00:06<00:14,  1.20it/s]Setting `p

Testing college_mathematics ...


  0%|          | 0/25 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 1/25 [00:00<00:13,  1.73it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 2/25 [00:01<00:13,  1.72it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 3/25 [00:01<00:12,  1.72it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 4/25 [00:02<00:12,  1.72it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 20%|██        | 5/25 [00:02<00:11,  1.71it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 24%|██▍       | 6/25 [00:03<00:11,  1.71it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 28%|██▊       | 7/25 [00:04<00:10,  1.71it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 32%|███▏      | 8/25 [00:04<00:09,  1.71it/s]Setting `p

Testing college_medicine ...


  0%|          | 0/44 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 1/44 [00:00<00:22,  1.88it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▍         | 2/44 [00:01<00:41,  1.02it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 3/44 [00:02<00:31,  1.32it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  9%|▉         | 4/44 [00:02<00:26,  1.52it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 11%|█▏        | 5/44 [00:03<00:22,  1.71it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 14%|█▎        | 6/44 [00:03<00:20,  1.86it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 7/44 [00:04<00:19,  1.91it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 18%|█▊        | 8/44 [00:04<00:19,  1.86it/s]Setting `p

Testing college_physics ...


  0%|          | 0/26 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 1/26 [00:00<00:12,  1.99it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 2/26 [00:00<00:11,  2.01it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 3/26 [00:01<00:11,  2.02it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 15%|█▌        | 4/26 [00:01<00:11,  2.00it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 19%|█▉        | 5/26 [00:02<00:10,  1.98it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 23%|██▎       | 6/26 [00:03<00:10,  2.00it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 27%|██▋       | 7/26 [00:03<00:09,  2.02it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 31%|███       | 8/26 [00:04<00:09,  2.00it/s]Setting `p

Testing computer_security ...


  0%|          | 0/25 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 1/25 [00:00<00:11,  2.08it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 2/25 [00:00<00:11,  2.08it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 3/25 [00:01<00:10,  2.04it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 4/25 [00:01<00:09,  2.21it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 20%|██        | 5/25 [00:02<00:08,  2.36it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 24%|██▍       | 6/25 [00:02<00:08,  2.36it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 28%|██▊       | 7/25 [00:03<00:07,  2.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 32%|███▏      | 8/25 [00:03<00:06,  2.43it/s]Setting `p

Testing conceptual_physics ...


  0%|          | 0/59 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 1/59 [00:00<00:16,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 2/59 [00:00<00:16,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▌         | 3/59 [00:00<00:16,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 4/59 [00:01<00:16,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 5/59 [00:01<00:15,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|█         | 6/59 [00:01<00:15,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 7/59 [00:02<00:15,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 14%|█▎        | 8/59 [00:02<00:15,  3.26it/s]Setting `p

Testing econometrics ...


  0%|          | 0/29 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 1/29 [00:00<00:16,  1.72it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 2/29 [00:01<00:15,  1.72it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|█         | 3/29 [00:01<00:15,  1.64it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 14%|█▍        | 4/29 [00:02<00:15,  1.61it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 17%|█▋        | 5/29 [00:03<00:15,  1.60it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 21%|██        | 6/29 [00:03<00:14,  1.63it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 24%|██▍       | 7/29 [00:04<00:12,  1.69it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 28%|██▊       | 8/29 [00:04<00:12,  1.62it/s]Setting `p

Testing electrical_engineering ...


  0%|          | 0/37 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 1/37 [00:00<00:15,  2.27it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▌         | 2/37 [00:00<00:14,  2.33it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 3/37 [00:01<00:14,  2.35it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 11%|█         | 4/37 [00:01<00:13,  2.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 14%|█▎        | 5/37 [00:02<00:13,  2.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 6/37 [00:02<00:13,  2.34it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 19%|█▉        | 7/37 [00:02<00:12,  2.35it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 22%|██▏       | 8/37 [00:03<00:12,  2.36it/s]Setting `p

Testing elementary_mathematics ...


  0%|          | 0/95 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 1/95 [00:00<00:45,  2.05it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 2/95 [00:01<00:46,  1.99it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 3/95 [00:01<00:47,  1.93it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 4/95 [00:02<00:46,  1.94it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▌         | 5/95 [00:02<00:46,  1.92it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▋         | 6/95 [00:03<00:45,  1.94it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 7/95 [00:03<00:44,  1.96it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 8/95 [00:04<00:44,  1.96it/s]Setting `p

Testing formal_logic ...


  0%|          | 0/32 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 1/32 [00:00<00:20,  1.52it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▋         | 2/32 [00:01<00:18,  1.62it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  9%|▉         | 3/32 [00:01<00:17,  1.67it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▎        | 4/32 [00:02<00:16,  1.69it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 5/32 [00:02<00:15,  1.74it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 19%|█▉        | 6/32 [00:03<00:14,  1.74it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 22%|██▏       | 7/32 [00:04<00:14,  1.74it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 25%|██▌       | 8/32 [00:04<00:13,  1.74it/s]Setting `p

Testing global_facts ...


  0%|          | 0/25 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 1/25 [00:00<00:09,  2.58it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 2/25 [00:00<00:09,  2.52it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 3/25 [00:01<00:08,  2.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 4/25 [00:01<00:08,  2.59it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 20%|██        | 5/25 [00:01<00:07,  2.57it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 24%|██▍       | 6/25 [00:02<00:07,  2.55it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 28%|██▊       | 7/25 [00:02<00:07,  2.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 32%|███▏      | 8/25 [00:03<00:06,  2.50it/s]Setting `p

Testing high_school_biology ...


  0%|          | 0/78 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|▏         | 1/78 [00:00<00:37,  2.04it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 2/78 [00:00<00:37,  2.03it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 3/78 [00:01<00:38,  1.94it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▌         | 4/78 [00:02<00:37,  1.97it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▋         | 5/78 [00:02<00:37,  1.93it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 6/78 [00:03<00:37,  1.94it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  9%|▉         | 7/78 [00:03<00:36,  1.94it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|█         | 8/78 [00:04<00:35,  1.95it/s]Setting `p

Testing high_school_chemistry ...


  0%|          | 0/51 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 1/51 [00:00<00:24,  2.04it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 2/51 [00:00<00:23,  2.05it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▌         | 3/51 [00:01<00:23,  2.04it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 4/51 [00:01<00:23,  2.00it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|▉         | 5/51 [00:02<00:22,  2.09it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 6/51 [00:02<00:21,  2.14it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 14%|█▎        | 7/51 [00:03<00:19,  2.21it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 8/51 [00:03<00:20,  2.15it/s]Setting `p

Testing high_school_computer_science ...


  0%|          | 0/25 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 1/25 [00:00<00:22,  1.06it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 2/25 [00:01<00:21,  1.09it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 3/25 [00:02<00:19,  1.14it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 4/25 [00:03<00:18,  1.11it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 20%|██        | 5/25 [00:04<00:17,  1.11it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 24%|██▍       | 6/25 [00:05<00:16,  1.13it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 28%|██▊       | 7/25 [00:06<00:15,  1.14it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 32%|███▏      | 8/25 [00:07<00:16,  1.05it/s]Setting `p

Testing high_school_geography ...


  0%|          | 0/50 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 1/50 [00:00<00:17,  2.74it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 2/50 [00:00<00:17,  2.72it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▌         | 3/50 [00:01<00:17,  2.67it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 4/50 [00:01<00:17,  2.69it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|█         | 5/50 [00:01<00:16,  2.68it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 6/50 [00:02<00:16,  2.69it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 14%|█▍        | 7/50 [00:02<00:16,  2.69it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 8/50 [00:02<00:15,  2.75it/s]Setting `p

Testing high_school_macroeconomics ...


  0%|          | 0/98 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 1/98 [00:00<00:38,  2.52it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 2/98 [00:00<00:36,  2.63it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 3/98 [00:01<00:35,  2.68it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 4/98 [00:01<00:34,  2.69it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▌         | 5/98 [00:01<00:35,  2.63it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▌         | 6/98 [00:02<00:35,  2.59it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 7/98 [00:02<00:34,  2.63it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 8/98 [00:03<00:33,  2.66it/s]Setting `p

Testing high_school_mathematics ...


  0%|          | 0/68 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|▏         | 1/68 [00:00<00:35,  1.87it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 2/68 [00:01<00:36,  1.79it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 3/68 [00:01<00:34,  1.87it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▌         | 4/68 [00:02<00:34,  1.87it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 5/68 [00:02<00:33,  1.90it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  9%|▉         | 6/68 [00:03<00:31,  1.95it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|█         | 7/68 [00:03<00:31,  1.94it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 8/68 [00:04<00:30,  1.95it/s]Setting `p

Testing high_school_microeconomics ...


  0%|          | 0/60 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 1/60 [00:00<00:25,  2.29it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 2/60 [00:00<00:23,  2.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▌         | 3/60 [00:01<00:23,  2.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 4/60 [00:01<00:22,  2.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 5/60 [00:02<00:23,  2.38it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|█         | 6/60 [00:02<00:22,  2.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 7/60 [00:02<00:21,  2.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 13%|█▎        | 8/60 [00:03<00:21,  2.43it/s]Setting `p

Testing high_school_physics ...


  0%|          | 0/38 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 1/38 [00:00<00:21,  1.75it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▌         | 2/38 [00:01<00:23,  1.56it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 3/38 [00:01<00:21,  1.65it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 11%|█         | 4/38 [00:02<00:20,  1.68it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 13%|█▎        | 5/38 [00:02<00:19,  1.70it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 6/38 [00:03<00:18,  1.75it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 18%|█▊        | 7/38 [00:04<00:17,  1.75it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 21%|██        | 8/38 [00:04<00:17,  1.75it/s]Setting `p

Testing high_school_psychology ...


  0%|          | 0/137 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 1/137 [00:00<01:00,  2.26it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|▏         | 2/137 [00:00<01:05,  2.07it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 3/137 [00:01<01:05,  2.06it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 4/137 [00:01<01:05,  2.05it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▎         | 5/137 [00:02<01:04,  2.04it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 6/137 [00:02<01:03,  2.05it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▌         | 7/137 [00:03<01:01,  2.11it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▌         | 8/137 [00:03<01:03,  2.02it/s]S

Testing high_school_statistics ...


  0%|          | 0/54 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 1/54 [00:00<00:41,  1.27it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▎         | 2/54 [00:01<00:42,  1.22it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▌         | 3/54 [00:02<00:40,  1.27it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 4/54 [00:03<00:38,  1.30it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  9%|▉         | 5/54 [00:03<00:37,  1.31it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 11%|█         | 6/54 [00:04<00:36,  1.32it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 13%|█▎        | 7/54 [00:05<00:36,  1.30it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 15%|█▍        | 8/54 [00:06<00:36,  1.26it/s]Setting `p

Testing human_aging ...


  0%|          | 0/56 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 1/56 [00:00<00:16,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▎         | 2/56 [00:00<00:17,  3.13it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▌         | 3/56 [00:00<00:16,  3.23it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 4/56 [00:01<00:16,  3.10it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  9%|▉         | 5/56 [00:01<00:15,  3.19it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 11%|█         | 6/56 [00:01<00:16,  3.11it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▎        | 7/56 [00:02<00:15,  3.21it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 14%|█▍        | 8/56 [00:02<00:15,  3.13it/s]Setting `p

Testing human_sexuality ...


  0%|          | 0/33 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 1/33 [00:00<00:11,  2.78it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▌         | 2/33 [00:00<00:10,  2.87it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  9%|▉         | 3/33 [00:01<00:10,  2.81it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 4/33 [00:01<00:10,  2.86it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 15%|█▌        | 5/33 [00:01<00:09,  2.89it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 18%|█▊        | 6/33 [00:02<00:09,  2.90it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 21%|██        | 7/33 [00:02<00:09,  2.84it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 24%|██▍       | 8/33 [00:02<00:09,  2.74it/s]Setting `p

Testing international_law ...


  0%|          | 0/31 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 1/31 [00:00<00:19,  1.57it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▋         | 2/31 [00:01<00:18,  1.57it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|▉         | 3/31 [00:01<00:17,  1.63it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 13%|█▎        | 4/31 [00:02<00:16,  1.60it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 5/31 [00:03<00:16,  1.57it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 19%|█▉        | 6/31 [00:03<00:15,  1.57it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 23%|██▎       | 7/31 [00:04<00:15,  1.55it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 26%|██▌       | 8/31 [00:05<00:14,  1.55it/s]Setting `p

Testing jurisprudence ...


  0%|          | 0/27 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▎         | 1/27 [00:00<00:10,  2.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 2/27 [00:00<00:09,  2.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 11%|█         | 3/27 [00:01<00:09,  2.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 15%|█▍        | 4/27 [00:01<00:09,  2.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 19%|█▊        | 5/27 [00:02<00:09,  2.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 22%|██▏       | 6/27 [00:02<00:08,  2.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 26%|██▌       | 7/27 [00:02<00:08,  2.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 30%|██▉       | 8/27 [00:03<00:07,  2.53it/s]Setting `p

Testing logical_fallacies ...


  0%|          | 0/41 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 1/41 [00:00<00:17,  2.27it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▍         | 2/41 [00:00<00:17,  2.26it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 3/41 [00:01<00:16,  2.30it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|▉         | 4/41 [00:01<00:15,  2.32it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 5/41 [00:02<00:15,  2.30it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 15%|█▍        | 6/41 [00:02<00:15,  2.28it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 17%|█▋        | 7/41 [00:03<00:14,  2.31it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 20%|█▉        | 8/41 [00:03<00:14,  2.22it/s]Setting `p

Testing machine_learning ...


  0%|          | 0/28 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▎         | 1/28 [00:00<00:17,  1.56it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 2/28 [00:01<00:16,  1.56it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 11%|█         | 3/28 [00:01<00:16,  1.56it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 14%|█▍        | 4/28 [00:02<00:15,  1.56it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 18%|█▊        | 5/28 [00:03<00:14,  1.56it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 21%|██▏       | 6/28 [00:03<00:14,  1.56it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 25%|██▌       | 7/28 [00:04<00:13,  1.56it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 29%|██▊       | 8/28 [00:05<00:13,  1.52it/s]Setting `p

Testing management ...


  0%|          | 0/26 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 1/26 [00:00<00:07,  3.56it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 2/26 [00:00<00:06,  3.54it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 3/26 [00:00<00:06,  3.56it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 15%|█▌        | 4/26 [00:01<00:06,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 19%|█▉        | 5/26 [00:01<00:05,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 23%|██▎       | 6/26 [00:01<00:05,  3.52it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 27%|██▋       | 7/26 [00:01<00:05,  3.53it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 31%|███       | 8/26 [00:02<00:05,  3.54it/s]Setting `p

Testing marketing ...


  0%|          | 0/59 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 1/59 [00:00<00:22,  2.53it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 2/59 [00:00<00:23,  2.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▌         | 3/59 [00:01<00:22,  2.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 4/59 [00:01<00:22,  2.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 5/59 [00:02<00:21,  2.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|█         | 6/59 [00:02<00:21,  2.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 7/59 [00:02<00:20,  2.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 14%|█▎        | 8/59 [00:03<00:20,  2.52it/s]Setting `p

Testing medical_genetics ...


  0%|          | 0/25 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 1/25 [00:00<00:08,  2.99it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 2/25 [00:00<00:07,  2.95it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 3/25 [00:01<00:07,  2.95it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▌        | 4/25 [00:01<00:07,  2.94it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 20%|██        | 5/25 [00:01<00:06,  2.93it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 24%|██▍       | 6/25 [00:02<00:06,  2.92it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 28%|██▊       | 7/25 [00:02<00:06,  2.92it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 32%|███▏      | 8/25 [00:02<00:05,  2.92it/s]Setting `p

Testing miscellaneous ...


  0%|          | 0/196 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 1/196 [00:00<00:57,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 2/196 [00:00<00:57,  3.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 3/196 [00:00<00:56,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 4/196 [00:01<00:59,  3.23it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 5/196 [00:01<00:57,  3.30it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 6/196 [00:01<00:56,  3.35it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▎         | 7/196 [00:02<00:55,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 8/196 [00:02<00:55,  3.41it/s]S

Testing moral_disputes ...


  0%|          | 0/87 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 1/87 [00:00<00:42,  2.04it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 2/87 [00:00<00:39,  2.17it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 3/87 [00:01<00:39,  2.11it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▍         | 4/87 [00:01<00:38,  2.16it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▌         | 5/87 [00:02<00:38,  2.11it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 6/87 [00:02<00:37,  2.15it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 7/87 [00:03<00:37,  2.12it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  9%|▉         | 8/87 [00:03<00:37,  2.09it/s]Setting `p

Testing moral_scenarios ...


  0%|          | 0/224 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  0%|          | 1/224 [00:00<02:10,  1.70it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 2/224 [00:01<02:17,  1.61it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|▏         | 3/224 [00:01<02:18,  1.59it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 4/224 [00:02<02:19,  1.58it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 5/224 [00:03<02:15,  1.62it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 6/224 [00:03<02:16,  1.59it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 7/224 [00:04<02:16,  1.58it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▎         | 8/224 [00:05<02:16,  1.58it/s]S

Testing nutrition ...


  0%|          | 0/77 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|▏         | 1/77 [00:00<00:43,  1.76it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 2/77 [00:01<00:41,  1.81it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 3/77 [00:01<00:41,  1.78it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▌         | 4/77 [00:02<00:43,  1.69it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▋         | 5/77 [00:02<00:42,  1.70it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 6/77 [00:03<00:41,  1.71it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  9%|▉         | 7/77 [00:04<00:40,  1.72it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|█         | 8/77 [00:04<00:39,  1.73it/s]Setting `p

Testing philosophy ...


  0%|          | 0/78 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|▏         | 1/78 [00:00<00:22,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 2/78 [00:00<00:24,  3.16it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 3/78 [00:00<00:24,  3.08it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▌         | 4/78 [00:01<00:23,  3.20it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▋         | 5/78 [00:01<00:24,  3.01it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 6/78 [00:01<00:24,  2.91it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  9%|▉         | 7/78 [00:02<00:26,  2.73it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|█         | 8/78 [00:02<00:25,  2.73it/s]Setting `p

Testing professional_accounting ...


  0%|          | 0/71 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|▏         | 1/71 [00:00<00:44,  1.58it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 2/71 [00:01<00:43,  1.57it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 3/71 [00:01<00:43,  1.56it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▌         | 4/71 [00:02<00:43,  1.54it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 5/71 [00:03<00:44,  1.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  8%|▊         | 6/71 [00:03<00:43,  1.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|▉         | 7/71 [00:04<00:43,  1.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 11%|█▏        | 8/71 [00:05<00:42,  1.50it/s]Setting `p

Testing professional_law ...


  0%|          | 0/384 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  0%|          | 1/384 [00:01<10:53,  1.71s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 2/384 [00:03<10:51,  1.70s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 3/384 [00:05<11:18,  1.78s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 4/384 [00:06<11:05,  1.75s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|▏         | 5/384 [00:08<10:52,  1.72s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 6/384 [00:10<10:55,  1.73s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 7/384 [00:12<10:57,  1.74s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 8/384 [00:13<10:51,  1.73s/it]S

Testing professional_medicine ...


  0%|          | 0/68 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|▏         | 1/68 [00:01<01:14,  1.12s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 2/68 [00:02<01:14,  1.14s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 3/68 [00:03<01:11,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  6%|▌         | 4/68 [00:04<01:09,  1.08s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 5/68 [00:05<01:12,  1.16s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  9%|▉         | 6/68 [00:06<01:13,  1.18s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|█         | 7/68 [00:07<01:09,  1.14s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 8/68 [00:09<01:08,  1.14s/it]Setting `p

Testing professional_psychology ...


  0%|          | 0/153 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 1/153 [00:00<01:26,  1.75it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|▏         | 2/153 [00:01<01:23,  1.81it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 3/153 [00:01<01:28,  1.69it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 4/153 [00:02<01:27,  1.71it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  3%|▎         | 5/153 [00:02<01:29,  1.66it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▍         | 6/153 [00:03<01:27,  1.68it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▍         | 7/153 [00:04<01:25,  1.70it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▌         | 8/153 [00:04<01:24,  1.71it/s]S

Testing public_relations ...


  0%|          | 0/28 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  4%|▎         | 1/28 [00:00<00:13,  1.98it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 2/28 [00:00<00:11,  2.25it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 11%|█         | 3/28 [00:01<00:10,  2.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 14%|█▍        | 4/28 [00:01<00:10,  2.38it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 18%|█▊        | 5/28 [00:02<00:10,  2.22it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 21%|██▏       | 6/28 [00:02<00:09,  2.23it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 25%|██▌       | 7/28 [00:03<00:09,  2.32it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 29%|██▊       | 8/28 [00:03<00:08,  2.37it/s]Setting `p

Testing world_religions ...


  0%|          | 0/43 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  2%|▏         | 1/43 [00:00<00:10,  3.97it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  5%|▍         | 2/43 [00:00<00:11,  3.72it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  7%|▋         | 3/43 [00:00<00:10,  3.65it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  9%|▉         | 4/43 [00:01<00:10,  3.77it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 12%|█▏        | 5/43 [00:01<00:09,  3.82it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 14%|█▍        | 6/43 [00:01<00:09,  3.72it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 16%|█▋        | 7/43 [00:01<00:09,  3.68it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 19%|█▊        | 8/43 [00:02<00:09,  3.62it/s]Setting `p

ACC-abstract_algebra: 0.3500
ACC-astronomy: 0.7039
ACC-business_ethics: 0.5900
ACC-clinical_knowledge: 0.7057
ACC-college_biology: 0.7292
ACC-college_chemistry: 0.4200
ACC-college_computer_science: 0.4700
ACC-college_mathematics: 0.2900
ACC-college_medicine: 0.6127
ACC-college_physics: 0.4118
ACC-computer_security: 0.8200
ACC-conceptual_physics: 0.5404
ACC-econometrics: 0.4649
ACC-electrical_engineering: 0.5517
ACC-elementary_mathematics: 0.3704
ACC-formal_logic: 0.3889
ACC-global_facts: 0.3800
ACC-high_school_biology: 0.7516
ACC-high_school_chemistry: 0.5813
ACC-high_school_computer_science: 0.6900
ACC-high_school_geography: 0.8283
ACC-high_school_macroeconomics: 0.6333
ACC-high_school_mathematics: 0.3889
ACC-high_school_microeconomics: 0.6891
ACC-high_school_physics: 0.3311
ACC-high_school_psychology: 0.8312
ACC-high_school_statistics: 0.4491
ACC-human_aging: 0.6547
ACC-human_sexuality: 0.7328
ACC-international_law: 0.8182
ACC-jurisprudence: 0.7500
ACC-logical_fallacies: 0.7423
ACC-m


