In [1]:
import os
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
from termcolor import colored
import wandb
import importlib
import re

import dataset_handler as dh
import loading_utils as lu
import testing_utils as tu

gptj_model = "EleutherAI/gpt-j-6B"
codeparrot_model = "lvwerra/codeparrot"

#model_name = "gpt-j"
model_name = "codegen"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
importlib.reload(dh)

<module 'dataset_handler' from '/home/PracticalWork2021/dataset_handler.py'>

In [12]:
transformers.set_seed(5)
if model_name == "gpt-j":
    """GPT-J and codeparrot models run in HFTest venv"""
    tokenizer = AutoTokenizer.from_pretrained(gptj_model)
    model = AutoModelForCausalLM.from_pretrained(gptj_model).half().eval().cuda()
elif model_name == "codegen":
    """CodeGen runs in the venv venv"""
    model_args = lu.model_args()
    #model_args.model = "codegen-350M-mono"
    model, tokenizer = lu.load_CodeGen(model_args)

loading parameters
loading parameters took 473.67s
loading tokenizer
loading tokenizer took 4.02s


In [8]:
#import exp_impl.eq_legacy.func_def_eq_short as exp_impl
#import exp_impl.func_def_general as exp_impl
import exp_impl.func_def_general_socratic as exp_impl


#priming_text_path = "data/priming_texts/gsm8k/clustering_prompt/3_clusters/cluster_2.txt"  # for codegen
#priming_text_path = "data/priming_texts/gsm8k/codegen/func_eq_short.txt"
#priming_text_path = "data/priming_texts/gsm8k/codegen/func_short.txt"
priming_text_path = "data/priming_texts/gsm8k/socratic/func_def_socratic_short.txt"
#priming_text_path = "data/priming_texts/gsm8k/concepts_prompt/part-whole_3.txt"
#wandb_run_name = "@100-codegen-0"
importlib.reload(exp_impl)

"""Load gsm8k"""

current_dataset = dh.init_dataset_from_name(
    "gsm8k-socratic",
    primingtext_path=priming_text_path,
    sample_func=exp_impl.sample_n_for_prompting,
    generate_prompt_func=exp_impl.generate_prompt,
)

tu.set_all_seeds()
#tu.set_all_seeds_alt()

sample_q_list, sample_steps_list, sample_a_list = current_dataset.sample_n_for_prompting(100, inc_eq=False)

with open("test_prompt_gen.txt", "w") as f:
    f.write(current_dataset.generate_prompt(sample_q_list[0]))

print(colored(sample_q_list[0], "blue"))
print(colored(sample_a_list[0], "green"))
print(colored(sample_steps_list[0], "yellow"))

[34mdef exercise4():
    """
    Carol sells tickets for an exhibition. During three days she sold tickets worth $960. One ticket costs $4.
    """[0m
[32m80[0m
[33m['# How many tickets did Carol sell during three days?', '# How many tickets did Carol sell on average in one day?'][0m


In [9]:
# Set up for CodeGen
config = lu.codegen_gen_args()
#config.num_return_sequences = 4 # 4 for gsm8k 5 for asdiv
config.num_return_sequences = 1
config.k = 3
config.max_length_after_input = 40
#config.top_p = 0.95
config.top_p = 0.9
config.top_k = 10
#config.temperature = 0.7
config.temperature = 1
config.min_length = 1

gen_args = config


In [10]:
def preproc_gen_toks(gen_toks, input_len, tokenizer):
    #print(len(gen_toks[0]))
    for gen_tok in gen_toks:
        last_tokens = gen_tok[input_len:]
        generated_text = tokenizer.decode(last_tokens)

    return generated_text.split("\n\n")[0]

from wrapt_timeout_decorator import *

@timeout(45)
def execute(c):
    exec(c, globals())

In [12]:
transformers.set_seed(5)
prompt = current_dataset.generate_prompt(sample_q_list[0])
prompt += f"\n    {sample_steps_list[0][0]}"
print(colored(prompt, "blue"))

[34mdef exercise1():
    """
    To run his grocery store, Mr. Haj needs $4000 a day. This money is used to pay for orders done, delivery costs and employees' salaries. If he spends 2/5 of the total operation costs on employees' salary and 1/4 of the remaining amount on delivery costs, how much money he pays for the orders done?
    """
    # How much money does Mr. Haj spend on employees' salary?
    money_per_day = 4000
    employees_money = money_per_day * 2 / 5

    # How much money does Mr. Haj have left after paying the employees?
    money_left = money_per_day - employees_money

    # How much money does Mr. Haj spend on delivery costs?
    delivery_costs = money_left * 1 / 4

    # How much money does Mr. Haj pay for the orders done?
    orders_money = money_left - delivery_costs
    return float(orders_money)


def exercise2():
    """
    Sue works in a factory and every 30 minutes, a machine she oversees produces 30 cans of soda. How many cans of soda can one machine produc

In [13]:
tokens = tokenizer(prompt, return_tensors="pt").input_ids
generated_tokens = model.generate(
    tokens.long().cuda(),
    use_cache=True,
    do_sample=True,
    top_k=gen_args.top_k,
    temperature=gen_args.temperature,
    top_p=gen_args.top_p,
    min_length=len(tokens[0]) + gen_args.min_length,
    max_length=len(tokens[0]) + gen_args.max_length_after_input,
    num_return_sequences=gen_args.num_return_sequences,
    pad_token_id=tokenizer.eos_token_id,
)

list_outputs = preproc_gen_toks(
    generated_tokens, len(tokens[0]), tokenizer
)
print(len(tokens[0]))
print(len(tokens[0]) + gen_args.max_length_after_input)
print(list_outputs)

NameError: name 'tokenizer' is not defined

In [33]:
prompt += list_outputs + f"\n\n    {sample_steps_list[0][1]}"
print(prompt)

def exercise1():
    """
    To run his grocery store, Mr. Haj needs $4000 a day. This money is used to pay for orders done, delivery costs and employees' salaries. If he spends 2/5 of the total operation costs on employees' salary and 1/4 of the remaining amount on delivery costs, how much money he pays for the orders done?
    """
    # How much money does Mr. Haj spend on employees' salary?
    money_per_day = 4000
    employees_money = money_per_day * 2 / 5

    # How much money does Mr. Haj have left after paying the employees?
    money_left = money_per_day - employees_money

    # How much money does Mr. Haj spend on delivery costs?
    delivery_costs = money_left * 1 / 4

    # How much money does Mr. Haj pay for the orders done?
    orders_money = money_left - delivery_costs
    return float(orders_money)


def exercise2():
    """
    Sue works in a factory and every 30 minutes, a machine she oversees produces 30 cans of soda. How many cans of soda can one machine produce in 

In [22]:
transformers.set_seed(5)
pass_1_list = []

for sample_q, sample_a in zip(sample_q_list[:50], sample_a_list[:50]):
    prompt = current_dataset.generate_prompt(sample_q)

    line_cnt = 0
    print_pattern = re.compile(r"def exercise4")
    while "return " not in prompt[re.search(print_pattern, prompt).start():] and line_cnt < 15:
        line_cnt += 1
        tokens = tokenizer(prompt, return_tensors="pt").input_ids
        generated_tokens = model.generate(
            tokens.long().cuda(),
            use_cache=True,
            do_sample=True,
            top_k=gen_args.top_k,
            temperature=gen_args.temperature,
            top_p=gen_args.top_p,
            min_length=len(tokens[0]) + gen_args.min_length,
            max_length=len(tokens[0]) + gen_args.max_length_after_input,
            num_return_sequences=gen_args.num_return_sequences,
            pad_token_id=tokenizer.eos_token_id,
        )

        list_outputs = preproc_gen_toks(
            generated_tokens, len(tokens[0]), tokenizer
        )
        #print(len(tokens[0]))
        #print(len(tokens[0]) + gen_args.max_length_after_input)
        #print(list_outputs)

        prompt += "\n" + list_outputs


        if "return " in prompt[re.search(print_pattern, prompt).start():]:
            try:
                #print(prompt[re.search(print_pattern, prompt).start():])
                execute(prompt)
                s = exercise4()
            except Exception as e:
                print(e)
                s = 1111111
        #print(prompt)

    pass_1_list.append(int(s == float(sample_a)))
    if int(s == float(sample_a)) == 1:
        print(s)
        print(float(sample_a))

np.mean(np.array(pass_1_list))

80.0
80.0
7100.0
7100.0
name 'nr_rabbits_in_the_' is not defined
invalid syntax (<string>, line 43)
name 'suits_per_lily' is not defined
name 'minutes_per_day' is not defined
name 'tilling_length' is not defined
unsupported operand type(s) for -: 'float' and 'list'
73.0
73.0
name 'distance_between_kenn_and_' is not defined
invalid syntax (<string>, line 42)
1890
1890.0


0.08

In [25]:
current_dataset = dh.init_dataset_from_name(
        "gsm8k-socratic",
        primingtext_path=priming_text_path,
        sample_func=exp_impl.sample_n_for_prompting,
        generate_prompt_func=exp_impl.generate_prompt,
    )

In [27]:
current_dataset.data[0]["answer"]

'How many clips did Natalia sell in May? ** Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nHow many clips did Natalia sell altogether in April and May? ** Natalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72'