In [1]:
import os
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
from termcolor import colored
import wandb
import importlib
import re

import dataset_handler as dh
import loading_utils as lu
import testing_utils as tu

gptj_model = "EleutherAI/gpt-j-6B"
codeparrot_model = "lvwerra/codeparrot"

#model_name = "gpt-j"
model_name = "codegen"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
importlib.reload(dh)

<module 'dataset_handler' from '/home/PracticalWork2021/dataset_handler.py'>

In [3]:
transformers.set_seed(5)
if model_name == "gpt-j":
    """GPT-J and codeparrot models run in HFTest venv"""
    tokenizer = AutoTokenizer.from_pretrained(gptj_model)
    model = AutoModelForCausalLM.from_pretrained(gptj_model).half().eval().cuda()
elif model_name == "codegen":
    """CodeGen runs in the venv venv"""
    model_args = lu.model_args()
    #model_args.model = "codegen-350M-mono"
    model, tokenizer = lu.load_CodeGen(model_args)

loading parameters
loading parameters took 469.87s
loading tokenizer
loading tokenizer took 3.98s


In [23]:
#import exp_impl.func_def_general_socratic as exp_impl
import exp_impl.func_def_general_socratic_variable_hint as exp_impl

priming_text_path = "data/priming_texts/gsm8k/socratic/var_hint/func_def_socratic_step1.txt"
importlib.reload(exp_impl)

priming_text_dir = "data/priming_texts/gsm8k/socratic/var_hint/"

priming_text_paths_list = os.listdir(priming_text_dir)
priming_text_paths_list.sort()

"""Load gsm8k"""

current_dataset = dh.init_dataset_from_name(
    "gsm8k-socratic",
    primingtext_path=priming_text_path,
    sample_func=exp_impl.sample_n_for_prompting,
    generate_prompt_func=exp_impl.generate_prompt,
)

tu.set_all_seeds()

sample_q_list, sample_steps_list, sample_a_list = current_dataset.sample_n_for_prompting(100, inc_eq=False)

with open("test_prompt_gen.txt", "w") as f:
    f.write(current_dataset.generate_prompt(sample_q_list[0]))

print(colored(sample_q_list[0], "blue"))
print(colored(sample_a_list[0], "green"))
print(colored(sample_steps_list[0], "yellow"))

[34mdef exercise4():
    """
    Carol sells tickets for an exhibition. During three days she sold tickets worth $960. One ticket costs $4.
    """[0m
[32m80[0m
[33m[' How many tickets did Carol sell during three days?\n    Use no more than 2 variables', ' How many tickets did Carol sell on average in one day?\n    Use no more than 4 variables'][0m


In [24]:
def remove_last_question(current_prompt):
    temp_list = current_prompt.split('"""')

    sample_q = temp_list[-2]

    comma_list_senteces = sample_q.split(",")
    period_list_senteces = sample_q.split(".")

    if len(comma_list_senteces[-1]) > len(period_list_senteces[-1]):
        sample_q = sample_q[: -len(period_list_senteces[-1])] + "\n    "
    else:
        sample_q = sample_q[: -len(comma_list_senteces[-1])] + "\n    "

    temp_list[-2] = sample_q

    return '"""'.join(temp_list)


def add_step_to_sample(current_prompt, step_q):
    temp_list = current_prompt.split('"""')

    if temp_list[-2][-6] == ".":
        temp_list[-2] = temp_list[-2][:-5] + step_q
    else:
        str_list = list(step_q)
        str_list[1] = step_q[1].lower()
        step_q = "".join(str_list)
        temp_list[-2] = temp_list[-2][:-5] + step_q

    return '"""'.join(temp_list)

def change_prompt_for_next_step(current_prompt, current_step):
    #print(colored(priming_text_paths_list[current_step+1], "blue"))
    if current_step > 3:
        current_step = 3
    with open(os.path.join(priming_text_dir, priming_text_paths_list[current_step+1]), "r") as f:
        current_priming_text = f.read()

    return current_priming_text + "\n\n" + "def exercise4" + current_prompt.split("def exercise4")[-1]

def preproc_gen_toks(gen_toks, input_len, tokenizer, last=False):
    for gen_tok in gen_toks:
        last_tokens = gen_tok[input_len:]
        generated_text = tokenizer.decode(last_tokens)
        with open("test_preproc_gen_tokens.txt", "a") as f:
            f.write(generated_text)

    if last:
        return tu.preproc_gen_toks(gen_toks, input_len, tokenizer, func_def_mod=True)
    return [generated_text.split("return ")[0][:-5]]

from wrapt_timeout_decorator import *

@timeout(45)
def execute(c):
    exec(c, globals())

In [25]:
# Set up for CodeGen
config = lu.codegen_gen_args()
config.num_return_sequences = 1
config.max_length_after_input = 60
config.top_p = 0.8
config.top_k = 30
#config.temperature = 0.7
config.temperature = 1
config.min_length = 1

gen_args = config

In [26]:
transformers.set_seed(5)
pass_1_list = []
cnt = 0
for sample_q, sample_a, sample_steps in zip(sample_q_list, sample_a_list, sample_steps_list):
    cnt += 1
    cp = current_dataset.generate_prompt(sample_q)
    for i, step_q in enumerate(sample_steps):
        is_last = (i == len(sample_steps) - 1)
        try:
            cp = add_step_to_sample(cp, step_q)
        except:
            print(colored("Error in add_step_to_sample", "red"))
            print(colored(cnt, "red"))
            break
        with open("test_prompt_gen.txt", "w") as f:
            f.write(cp)
        #print(colored(f"Step_{i+1}", "green"))
        tokens = tokenizer(cp, return_tensors="pt").input_ids
        generated_tokens = model.generate(
            tokens.long().cuda(),
            use_cache=True,
            do_sample=False,
            top_k=gen_args.top_k,
            temperature=gen_args.temperature,
            top_p=gen_args.top_p,
            min_length=len(tokens[0]) + gen_args.min_length,
            max_length=len(tokens[0]) + gen_args.max_length_after_input,
            num_return_sequences=gen_args.num_return_sequences,
            pad_token_id=tokenizer.eos_token_id,
        )

        list_outputs = preproc_gen_toks(
            generated_tokens, len(tokens[0]), tokenizer, last=is_last
        )

        cp = cp + list_outputs[0]

        if not is_last:
            try:
                cp = change_prompt_for_next_step(cp, i)
                cp = remove_last_question(cp)
            except:
                print(colored("Error", "red"))
                break
        else:
            break

    try:
        #print(prompt[re.search(print_pattern, prompt).start():])
        execute(cp)
        s = exercise4()
    except Exception as e:
        print(e)
        s = 1111111

    pass_1_list.append(int(s == float(sample_a)))
    if cnt % 10 == 0:
        print(f"Up to sample {cnt}: {np.mean(np.array(pass_1_list))}")

np.mean(np.array(pass_1_list))       

invalid syntax (<string>, line 52)
name 'articles_per_da_e' is not defined
Up to sample 10: 0.2
invalid syntax (<string>, line 65)
name 'nr_maogoes' is not defined
invalid syntax (<string>, line 40)
invalid syntax (<string>, line 59)
Up to sample 20: 0.1
invalid syntax (<string>, line 44)
invalid syntax (<string>, line 55)
name 'ticket_cost_fris_wheel' is not defined
Up to sample 30: 0.06666666666666667
name 'nr_woens' is not defined
invalid syntax (<string>, line 40)
invalid syntax (<string>, line 43)
invalid syntax (<string>, line 55)
invalid syntax (<string>, line 56)
[31mError in add_step_to_sample[0m
[31m39[0m
invalid syntax (<string>, line 62)
Up to sample 40: 0.05
invalid syntax (<string>, line 59)
name 'nr_blocks_to_gallary' is not defined
Up to sample 50: 0.12
name 'cpus_sales' is not defined
name 'nr_of_ducks_per_year_afrer_5_years_and_150_ducks_and_100_ducks' is not defined
Up to sample 60: 0.1
name 'gas_statio_4' is not defined
Up to sample 70: 0.11428571428571428
[31m

0.11

In [28]:
prompt = current_dataset.generate_prompt(sample_q_list[0])
prompt = add_step_to_sample(prompt, sample_steps_list[0][0])
tokens = tokenizer(prompt, return_tensors="pt").input_ids
generated_tokens = model.generate(
    tokens.long().cuda(),
    use_cache=True,
    do_sample=False,
    top_k=gen_args.top_k,
    temperature=gen_args.temperature,
    top_p=gen_args.top_p,
    min_length=len(tokens[0]) + gen_args.min_length,
    max_length=len(tokens[0]) + gen_args.max_length_after_input,
    num_return_sequences=gen_args.num_return_sequences,
    pad_token_id=tokenizer.eos_token_id,
)

list_outputs = preproc_gen_toks(
    generated_tokens, len(tokens[0]), tokenizer
)
print(len(tokens[0]))
print(len(tokens[0]) + gen_args.max_length_after_input)
print(list_outputs)

351
376

    nr_tickets = 960 * 3
    


In [22]:
transformers.set_seed(5)
pass_1_list = []

for sample_q, sample_a in zip(sample_q_list[:50], sample_a_list[:50]):
    prompt = current_dataset.generate_prompt(sample_q)

    line_cnt = 0
    print_pattern = re.compile(r"def exercise4")
    while "return " not in prompt[re.search(print_pattern, prompt).start():] and line_cnt < 15:
        line_cnt += 1
        tokens = tokenizer(prompt, return_tensors="pt").input_ids
        generated_tokens = model.generate(
            tokens.long().cuda(),
            use_cache=True,
            do_sample=True,
            top_k=gen_args.top_k,
            temperature=gen_args.temperature,
            top_p=gen_args.top_p,
            min_length=len(tokens[0]) + gen_args.min_length,
            max_length=len(tokens[0]) + gen_args.max_length_after_input,
            num_return_sequences=gen_args.num_return_sequences,
            pad_token_id=tokenizer.eos_token_id,
        )

        list_outputs = preproc_gen_toks(
            generated_tokens, len(tokens[0]), tokenizer
        )
        #print(len(tokens[0]))
        #print(len(tokens[0]) + gen_args.max_length_after_input)
        #print(list_outputs)

        prompt += "\n" + list_outputs


        if "return " in prompt[re.search(print_pattern, prompt).start():]:
            try:
                #print(prompt[re.search(print_pattern, prompt).start():])
                execute(prompt)
                s = exercise4()
            except Exception as e:
                print(e)
                s = 1111111
        #print(prompt)

    pass_1_list.append(int(s == float(sample_a)))
    if int(s == float(sample_a)) == 1:
        print(s)
        print(float(sample_a))

np.mean(np.array(pass_1_list))

80.0
80.0
7100.0
7100.0
name 'nr_rabbits_in_the_' is not defined
invalid syntax (<string>, line 43)
name 'suits_per_lily' is not defined
name 'minutes_per_day' is not defined
name 'tilling_length' is not defined
unsupported operand type(s) for -: 'float' and 'list'
73.0
73.0
name 'distance_between_kenn_and_' is not defined
invalid syntax (<string>, line 42)
1890
1890.0


0.08