# Prototype implemetation without AutoCog's Automaton

In [8]:
import os, sys, csv, copy, json
import glob, tqdm, itertools
import pathlib, shutil

letters = ['A','B','C','D']

def list_topics(datapath, mode, pattern='*'):
    datapath = os.path.realpath(datapath)
    basename_no_ext = lambda p: p.split('/')[-1].split('.')[0]
    topic_and_mode_bne = lambda bnes: { 'topic' : '_'.join(bnes[:-1]), 'mode' : bnes[-1] }
    if mode == 'aux':
        topic_auxiliary = lambda p: topic_and_mode_bne(basename_no_ext(p).split('_') + ['aux'])
        found = glob.glob(f"{datapath}/auxiliary_train/{pattern}.csv")
        return list(map(topic_auxiliary, found))
    else:
        topic_and_mode = lambda p: topic_and_mode_bne(basename_no_ext(p).split('_'))
        found = glob.glob(f"{datapath}/{mode}/{pattern}_{mode}.csv")
        return list(map(topic_and_mode, found))

def open_topic(topic, mode, datapath):
    datapath = os.path.realpath(datapath)
    data = []
    if mode == 'aux':
        filepath = f"{datapath}/auxiliary_train/{topic}.csv"
    else:
        filepath = f"{datapath}/{mode}/{topic}_{mode}.csv"
    with open(filepath) as F:
        for (i,row) in enumerate(csv.reader(F)):
            data.append({ 'idx' : i, 'mode' : mode, 'topic' : topic, 'question': row[0], 'choices' : row[1:-1], 'answer' : row[-1] })
    return data

def guess_answer(llm, topic, question, choices, **kwargs):
    prompt = "You are answering a question for the MMLU exam.\n"
    prompt += f"Topic: {topic.replace('_', ' ')}\n"
    prompt += f"Question: {question}\n"
    prompt += "Answer: "
    idx = llm.choose(prompt=prompt, choices=choices)
    return letters[idx], prompt

def choose_answer(llm, topic, question, choices, **kwargs):
    prompt = "You are answering a question for the MMLU exam.\n"
    prompt += f"Topic: {topic.replace('_', ' ')}\n"
    prompt += f"Question: {question}\n"
    for (l,choice) in zip(letters,choices):
        prompt += f"Choice {l}: {choice}\n"
    prompt += "Answer: "
    idx = llm.choose(prompt=prompt, choices=letters)
    return letters[idx], prompt

def repeat_answer(llm, topic, question, choices, **kwargs):
    prompt = "You are answering a question for the MMLU exam.\n"
    prompt += f"Topic: {topic.replace('_', ' ')}\n"
    prompt += f"Question: {question}\n"
    for (l,choice) in zip(letters,choices):
        prompt += f"Choice {l}: {choice}\n"
    prompt += "Answer: "
    idx = llm.choose(prompt=prompt, choices=choices)
    return letters[idx], prompt

def think_then_choose_answer(llm, topic, question, choices, max_thoughts=5, **kwargs):
    prompt = "You are answering a question for the MMLU exam.\n"
    prompt += f"Topic: {topic.replace('_', ' ')}\n"
    prompt += f"Question: {question}\n"
    for (l,choice) in zip(letters,choices):
        prompt += f"Choice {l}: {choice}\n"
    idx = 0
    cnt = 0
    while idx == 0 and cnt < max_thoughts:
        prompt += "Draft: "
        text = llm.complete(prompt=prompt, stop='\n', max_tokens=10)
        prompt += text.strip() + "\n"
        idx = llm.choose(prompt=prompt, choices=["Draft: ", "Answer: "])
        cnt += 1
    prompt += "Answer: "
    idx = llm.choose(prompt=prompt, choices=letters)
    return letters[idx], prompt

def think_then_repeat_answer(llm, topic, question, choices, max_thoughts=5, **kwargs):
    prompt = "You are answering a question for the MMLU exam.\n"
    prompt += f"Topic: {topic.replace('_', ' ')}\n"
    prompt += f"Question: {question}\n"
    for (l,choice) in zip(letters,choices):
        prompt += f"Choice {l}: {choice}\n"
    idx = 0
    cnt = 0
    while idx == 0 and cnt < max_thoughts:
        prompt += "Draft: "
        text = llm.complete(prompt=prompt, stop='\n', max_tokens=10)
        prompt += text.strip() + "\n"
        idx = llm.choose(prompt=prompt, choices=["Draft: ", "Answer: "])
        cnt += 1
    prompt += "Answer: "
    idx = llm.choose(prompt=prompt, choices=choices)
    return letters[idx], prompt

def run_mmlu(llm, data, methods, respath):
    pathlib.Path(respath).mkdir(parents=True, exist_ok=True)
    #results = { f"{sample['topic']}_{sample['idx']}" : copy.deepcopy(sample) for sample in data }
    prod = list(itertools.product(range(len(data)), list(methods.keys())))
    for (s, m) in tqdm.tqdm(prod):
        sample = data[s]
        tag = f"{sample['topic']}_{sample['idx']}"
        res = methods[m](llm, **sample)
        res = { m : res }
        #results[tag].update(res)
        with open(f'{respath}/{tag}_{m}.json', 'w') as F:
            res.update(sample)
            json.dump(res, F, indent=4)
    #return results

methods = {
    'guess' : guess_answer,
    'choose' : choose_answer,
    'repeat' : repeat_answer,
    'think-choose' : think_then_choose_answer,
    'think-repeat' : think_then_repeat_answer
}

In [9]:
from autocog.lm import Llama

llm = Llama(model_path="/workspace/models/7B/ggml-model-q4_0.bin", n_ctx=2048)
data = []
for topic in list_topics(datapath='/workspace/datasets/MMLU', mode='dev', pattern='*'):
    data += open_topic(datapath='/workspace/datasets/MMLU', **topic)
for topic in list_topics(datapath='/workspace/datasets/MMLU', mode='aux', pattern='*'):
    data += open_topic(datapath='/workspace/datasets/MMLU', **topic)
for topic in list_topics(datapath='/workspace/datasets/MMLU', mode='val', pattern='*'):
    data += open_topic(datapath='/workspace/datasets/MMLU', **topic)
for topic in list_topics(datapath='/workspace/datasets/MMLU', mode='test', pattern='*'):
    data += open_topic(datapath='/workspace/datasets/MMLU', **topic)

json.dump(data, open('data.json','w'), indent=4)
shutil.rmtree('results')
results = run_mmlu(llm, data, methods, 'results')

llama.cpp: loading model from /workspace/models/7B/ggml-model-q4_0.bin
llama_model_load_internal: format     = ggjt v1 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =  68.20 KB
llama_model_load_internal: mem required  = 5809.33 MB (+ 1026.00 MB per state)
llama_init_from_file: kv self size  = 1024.00 MB
  0%|                                                                                                                                   | 169/578500 [1:26:39<4942:15:01, 30.76s/it]


KeyboardInterrupt: 

In [None]:
print(json.dumps(results, indent=4))