In [1]:
import os
from xml.etree import ElementTree
import numpy as np
import torch
import json
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
from io import StringIO
from contextlib import redirect_stdout
from termcolor import colored

path_parent = os.path.dirname(os.getcwd())
os.chdir(path_parent)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
cur_dir = os.getcwd()
os.chdir(os.path.join(cur_dir, 'data'))
!git clone https://gitlab.cs.washington.edu/ALGES/TACL2015.git
!git clone https://github.com/chaochun/nlu-asdiv-dataset.git
!git clone https://github.com/openai/grade-school-math.git
os.chdir(cur_dir)

fatal: destination path 'TACL2015' already exists and is not an empty directory.
fatal: destination path 'nlu-asdiv-dataset' already exists and is not an empty directory.
fatal: destination path 'grade-school-math' already exists and is not an empty directory.


In [2]:
def read_string_from_file(path):
    with open(path, "r") as f:
        return f.read()


def sample_asdiv(dataset_path, nr_samples):
    dom = ElementTree.parse(dataset_path)

    # XML parsing
    body_list = dom.findall("ProblemSet/Problem/Body")
    answer_list = dom.findall("ProblemSet/Problem/Answer")
    question_list = dom.findall("ProblemSet/Problem/Question")
    formula_list = dom.findall("ProblemSet/Problem/Formula")
    stype_list = dom.findall("ProblemSet/Problem/Solution-Type")

    # Randomly choose a problem
    rand_indexes = np.random.randint(0, len(body_list), nr_samples)

    sample_a_list = []
    sample_q_list = []
    for rand_index in rand_indexes:
        sample_q_list.append(f"{body_list[rand_index].text} Write a program that prints {question_list[rand_index].text}")
        sample_a_list.append(answer_list[rand_index].text)

    return sample_q_list, sample_a_list


def sample_gsm8k(dataset_path):
    with open(dataset_path) as fh:
        data = [json.loads(line) for line in fh.readlines() if line]

    # Randomly choose a problem
    rand_index = np.random.randint(0, len(data))
    problem = data[rand_index]
    return problem["question"], re.findall(r"#### \w+", problem["answer"])[0][5:]

def sample_gsm8k_with_full_answer(dataset_path):
    with open(dataset_path) as fh:
        data = [json.loads(line) for line in fh.readlines() if line]

    # Randomly choose a problem
    rand_index = np.random.randint(0, len(data))
    problem = data[rand_index]

    proc_answer = problem["answer"].split("####")[0]
    proc_answer = re.sub(r"\n", " ", proc_answer)
    return problem["question"], proc_answer


def sample_singleEq(dataset_path):
    with open(dataset_path, "r") as f:
        data = json.load(f)

    # Randomly choose a problem
    rand_index = np.random.randint(0, len(data))
    problem = data[rand_index]
    return problem["sQuestion"], problem["lSolutions"]


def preproc_sol_as_div(raw_sol):
    sol = raw_sol.split(" ")[0]
    if ":" in sol:
        sol = int(sol.split(":")[0]) / int(sol.split(":")[1])

    try:
        sol = float(sol)
    except ValueError:
        sol = 22222222.0
    return float(sol)


def preproc_gen_toks(gen_toks, input_len):
    list_out = []
    for gen_tok in gen_toks:
        last_tokens = gen_tok[input_len:]
        generated_text = tokenizer.decode(last_tokens)
        output = generated_text.split("\n\n")[0]
        list_out.append(output)
    return list_out


def print_pred_from_output(output, sample_q, sample_a):
    f = StringIO()
    with redirect_stdout(f):
        try:
            exec(output)
        except Exception as e:
            print("111111111111111")

    #print(colored(sample_q, "green"))
    #print(colored(output, "yellow"))

    s = f.getvalue()
    try:
        s = float(s)
    except Exception as e:
        s = 1111111111.0
    #print(colored(s, "red"))
    #print(colored(preproc_sol_as_div(sample_a), "green"))

    is_correct = s == preproc_sol_as_div(sample_a)
    #print(is_correct)
    return is_correct


def pass_at_k(n, c, k):
    """
    :param n: total number of samples
    :param c: number of correct samples
    :param k: k in pass@$k$
    """
    if n - c < k:
        return 1.0
    return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))


In [10]:
torch.manual_seed(0)
np.random.seed(7)

# genji_model = ""
gptj_model = "EleutherAI/gpt-j-6B"
codeparrot_model = "lvwerra/codeparrot"

asdiv_path = "data/nlu-asdiv-dataset/dataset/ASDiv.xml"
gsm8k_path = "data/grade-school-math/grade_school_math/data/train.jsonl"
singleEq_path = "data/TACL2015/questions.json"

"""Choose the dataset you want to test"""
dataset_path = gsm8k_path
# dataset_path = singleEq_path
# dataset_path = asdiv_path

"""Load the priming text to add to the prompt and sample a question"""
priming_text = read_string_from_file("data/priming_texts/gsm8k_fewerfullanswer.txt")
# priming_text = read_string_from_file("data/priming_texts/singleEq.txt")
# priming_text = read_string_from_file("data/priming_texts/asdiv.txt")

sample_q, sample_a = sample_gsm8k_with_full_answer(dataset_path)
# sample_q, sample_a = sample_singleEq(dataset_path)
# sample_q_list, sample_a_list = sample_asdiv(dataset_path, 25)

sample_q_list = [sample_q]
sample_a_list = [sample_a]

print(sample_a)


If it takes 15 minutes for Dante to go to Hidden Lake and 7 minutes to walk back to the Park Office, he spends a total of 15+7=<<15+7=22>>22 minutes on the trip. When Dante arrives at the Park Office, he will have been gone from the Park Office 32 minutes altogether, and since he passed by the Lake Park restaurant from The Park Office, his walk from Park Office to the Lake Park restaurant took 32-22=<<32-22=10>>10 minutes. 


In [4]:
"""GPT-J and codeparrot models run in HFTest venv"""
tokenizer = AutoTokenizer.from_pretrained(gptj_model)
model = AutoModelForCausalLM.from_pretrained(gptj_model).eval().cuda()

"""Genji model run in HFTest_genji venv"""
#model = AutoModelForCausalLM.from_pretrained("NovelAI/genji-python-6B").eval().cuda()
#tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")

'Genji model run in HFTest_genji venv'

In [13]:
torch.manual_seed(42)
np.random.seed(42)

n = 1
k = 1

pass_k_list = []

for sample_q, sample_a in zip(sample_q_list, sample_a_list):
    # prompt = f"{priming_text}\n\n#{sample_q}"
    prompt = f"{priming_text}\n\n#{sample_q}\n\"\"\"<<15+7=22>> <<32-22=10>>\"\"\""
    #prompt = f"{priming_text}\n\n# {sample_q}\n\"\"\"{sample_a}\"\"\""
    print(prompt)

    tokens = tokenizer(prompt, return_tensors="pt").input_ids
    generated_tokens = model.generate(
        tokens.long().cuda(),
        use_cache=True,
        do_sample=True,
        top_k=50,
        temperature=0.4,
        top_p=0.9,
        min_length=1,
        max_length=len(tokens[0]) + 150,
        num_return_sequences=n,
        pad_token_id=tokenizer.eos_token_id,
    )
    print(len(generated_tokens))

    list_outputs = preproc_gen_toks(generated_tokens, len(tokens[0]))

    # is_correct_list = [print_pred_from_output(output, sample_q, sample_a) for output in list_outputs]

    # c = is_correct_list.count(True)

    # pass_k = pass_at_k(n, c, k)
    # pass_k_list.append(pass_k)


# The owner of a Turkish restaurant wanted to prepare traditional dishes for an upcoming celebration. She ordered ground beef, in four-pound packages, from three different butchers. The following morning, the first butcher delivered 10 packages. A couple of hours later, 7 packages arrived from the second butcher. Finally, the third butcher’s delivery arrived at dusk. If all the ground beef delivered by the three butchers weighed 100 pounds, write a program that prints how many packages did the third butcher deliver?
"""<<10*4=40>> <<7*4=28>> <<40+28=68>> <<100-68=32>> <<32/4=8>>"""
pounds_beef_per_package = 4
nr_first_butcher = 10
nr_second_butcher = 7
pounds = 100
total_packages = pounds / pounds_beef_per_package
nr_third_buthcer = total_packages - nr_first_butcher - nr_second_butcher
print(nr_third_buthcer)

# When Jason plays the video game, Duty for Ashes, his mission to slay the dragon hoard requires that he fire his weapon on average every 15 seconds.   And each time he fires his

In [14]:
print(list_outputs[0])
exec(list_outputs[0])


time_to_lake_park = 15
time_to_park_office = 7
time_to_restaurant = 32 - time_to_park_office
print(time_to_restaurant)
25


In [7]:
print(f"Pass@{k} = {np.mean(np.array(pass_k_list))}")

Pass@3 = 0.4
