In [1]:
import os
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
from termcolor import colored
import wandb
import importlib
import re

import dataset_handler as dh
import loading_utils as lu
import testing_utils as tu

gptj_model = "EleutherAI/gpt-j-6B"
codeparrot_model = "lvwerra/codeparrot"

#model_name = "gpt-j"
model_name = "codegen"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
importlib.reload(dh)

<module 'dataset_handler' from '/home/PracticalWork2021/dataset_handler.py'>

In [3]:
transformers.set_seed(5)
if model_name == "gpt-j":
    """GPT-J and codeparrot models run in HFTest venv"""
    tokenizer = AutoTokenizer.from_pretrained(gptj_model)
    model = AutoModelForCausalLM.from_pretrained(gptj_model).half().eval().cuda()
elif model_name == "codegen":
    """CodeGen runs in the venv venv"""
    model_args = lu.model_args()
    #model_args.model = "codegen-350M-mono"
    model, tokenizer = lu.load_CodeGen(model_args)

loading parameters
loading parameters took 431.88s
loading tokenizer
loading tokenizer took 4.05s


In [7]:
import exp_impl.func_def_general_socratic as exp_impl

priming_text_path = "data/priming_texts/gsm8k/socratic/no_space_no_comment/func_def_socratic_step1.txt"
importlib.reload(exp_impl)

priming_text_dir = "data/priming_texts/gsm8k/socratic/no_space_no_comment/"

priming_text_paths_list = os.listdir(priming_text_dir)
priming_text_paths_list.sort()

"""Load gsm8k"""

current_dataset = dh.init_dataset_from_name(
    "gsm8k-socratic",
    primingtext_path=priming_text_path,
    sample_func=exp_impl.sample_n_for_prompting,
    generate_prompt_func=exp_impl.generate_prompt,
)

tu.set_all_seeds()

sample_q_list, sample_steps_list, sample_a_list = current_dataset.sample_n_for_prompting(100, inc_eq=False)

with open("test_prompt_gen.txt", "w") as f:
    f.write(current_dataset.generate_prompt(sample_q_list[0]))

print(colored(sample_q_list[0], "blue"))
print(colored(sample_a_list[0], "green"))
print(colored(sample_steps_list[0], "yellow"))

[34mdef exercise4():
    """
    Carol sells tickets for an exhibition. During three days she sold tickets worth $960. One ticket costs $4.
    """[0m
[32m80[0m
[33m[' How many tickets did Carol sell during three days?', ' How many tickets did Carol sell on average in one day?'][0m


In [8]:
def remove_last_question(current_prompt):
    temp_list = current_prompt.split('"""')

    sample_q = temp_list[-2]

    comma_list_senteces = sample_q.split(",")
    period_list_senteces = sample_q.split(".")

    if len(comma_list_senteces[-1]) > len(period_list_senteces[-1]):
        sample_q = sample_q[: -len(period_list_senteces[-1])] + "\n    "
    else:
        sample_q = sample_q[: -len(comma_list_senteces[-1])] + "\n    "

    temp_list[-2] = sample_q

    return '"""'.join(temp_list)


def add_step_to_sample(current_prompt, step_q):
    temp_list = current_prompt.split('"""')

    if temp_list[-2][-6] == ".":
        temp_list[-2] = temp_list[-2][:-5] + step_q + "\n    "
    else:
        str_list = list(step_q)
        str_list[1] = step_q[1].lower()
        step_q = "".join(str_list)
        temp_list[-2] = temp_list[-2][:-5] + step_q + "\n    "

    return '"""'.join(temp_list)

def change_prompt_for_next_step(current_prompt, current_step):
    print(colored(priming_text_paths_list[current_step+1], "blue"))
    with open(os.path.join(priming_text_dir, priming_text_paths_list[current_step+1]), "r") as f:
        current_priming_text = f.read()

    return current_priming_text + "\n\n" + "def exercise4" + current_prompt.split("def exercise4")[-1]

def preproc_gen_toks(gen_toks, input_len, tokenizer, last=False):
    for gen_tok in gen_toks:
        last_tokens = gen_tok[input_len:]
        generated_text = tokenizer.decode(last_tokens)
        with open("test_preproc_gen_tokens.txt", "a") as f:
            f.write(generated_text)

    if last:
        return tu.preproc_gen_toks(gen_toks, input_len, tokenizer, func_def_mod=True)
    return generated_text.split("return ")[0]

from wrapt_timeout_decorator import *

@timeout(45)
def execute(c):
    exec(c, globals())

In [12]:
# Set up for CodeGen
config = lu.codegen_gen_args()
config.num_return_sequences = 1
config.max_length_after_input = 20
config.top_p = 0.8
config.top_k = 30
#config.temperature = 0.7
config.temperature = 1
config.min_length = 1

gen_args = config

In [13]:
transformers.set_seed(5)
id_sample = 3
cp = current_dataset.generate_prompt(sample_q_list[id_sample])
for i, step_q in enumerate(sample_steps_list[id_sample]):
    is_last = (i == len(sample_steps_list[0]) - 1)
    cp = add_step_to_sample(cp, step_q)
    with open("test_prompt_gen.txt", "w") as f:
        f.write(cp)
    print(colored(f"Step_{i+1}", "green"))
    tokens = tokenizer(cp, return_tensors="pt").input_ids
    generated_tokens = model.generate(
        tokens.long().cuda(),
        use_cache=True,
        do_sample=False,
        top_k=gen_args.top_k,
        temperature=gen_args.temperature,
        top_p=gen_args.top_p,
        min_length=len(tokens[0]) + gen_args.min_length,
        max_length=len(tokens[0]) + gen_args.max_length_after_input,
        num_return_sequences=gen_args.num_return_sequences,
        pad_token_id=tokenizer.eos_token_id,
    )

    list_outputs = preproc_gen_toks(
        generated_tokens, len(tokens[0]), tokenizer, last=is_last
    )

    cp = cp + list_outputs[0]

    print(len(list_outputs))
    print(colored(cp[-350:], "blue"))
    if not is_last:
        cp = change_prompt_for_next_step(cp, i)
        cp = remove_last_question(cp)
    else:
        break

print(cp)
        

[32mStep_1[0m
40
[34moats_cows_ratio = 4 / 1
    nr_goats = 9 * goats_cows_ratio
    return float(nr_goats)


def exercise4():
    """
    Vivian plays 10 Spotify songs every day. Her best friend Clara plays 2 fewer songs each day. If in June they didn't play any song during the weekends only, and there were 8 weekend days in June, how many days did they play?
    """
[0m
[34mfunc_def_socratic_step2.txt[0m
[32mStep_2[0m
1
[34mickens = nr_goats / goats_chickens_ratio
    return float(nr_chickens)

def exercise4():
    """
    Vivian plays 10 Spotify songs every day. Her best friend Clara plays 2 fewer songs each day. If in June they didn't play any song during the weekends only, and there were 8 weekend days in June, how many songs did Vivian play?
    """
INVALID OUTPUT[0m
def exercise1():
    """
    To run his grocery store, Mr. Haj needs $4000 a day. This money is used to pay for orders done, delivery costs and employees' salaries. If he spends 2/5 of the total operation c

In [20]:
transformers.set_seed(5)
prompt = current_dataset.generate_prompt(sample_q_list[0])
prompt += f"\n    {sample_steps_list[0][0]}"
print(colored(prompt, "blue"))

[34mdef exercise1():
    """
    To run his grocery store, Mr. Haj needs $4000 a day. This money is used to pay for orders done, delivery costs and employees' salaries. If he spends 2/5 of the total operation costs on employees' salary and 1/4 of the remaining amount on delivery costs
    """
    # How much money does Mr. Haj spend on employees' salary?
    money_per_day = 4000
    employees_money = money_per_day * 2 / 5
    return float(employees_money)


def exercise2():
    """
    Sue works in a factory and every 30 minutes, a machine she oversees produces 30 cans of soda.
    """
    # How many sets of 30 minutes are there in 8 hours?
    sets_in_hour = 2
    sets_in_8_hours = sets_in_hour * 8
    return flaot(sets_in_8_hours)


def exercise3():
    """
    Mr. Rainwater has some goats, 9 cows and some chickens. He has 4 times as many goats as cows and 2 times as many goats as chickens.
    """
    # How many goats does Mr. Rainwater have?
    goats_cows_ratio = 4 / 1
    nr_goat

In [28]:
prompt = current_dataset.generate_prompt(sample_q_list[0])
prompt = add_step_to_sample(prompt, sample_steps_list[0][0])
tokens = tokenizer(prompt, return_tensors="pt").input_ids
generated_tokens = model.generate(
    tokens.long().cuda(),
    use_cache=True,
    do_sample=False,
    top_k=gen_args.top_k,
    temperature=gen_args.temperature,
    top_p=gen_args.top_p,
    min_length=len(tokens[0]) + gen_args.min_length,
    max_length=len(tokens[0]) + gen_args.max_length_after_input,
    num_return_sequences=gen_args.num_return_sequences,
    pad_token_id=tokenizer.eos_token_id,
)

list_outputs = preproc_gen_toks(
    generated_tokens, len(tokens[0]), tokenizer
)
print(len(tokens[0]))
print(len(tokens[0]) + gen_args.max_length_after_input)
print(list_outputs)

351
376

    nr_tickets = 960 * 3
    


In [22]:
prompt = remove_last_comment(prompt)
step_nr = 2
prompt += list_outputs + f"\n    {sample_steps_list[0][step_nr-1]}"
priming_text_path = f"data/priming_texts/gsm8k/socratic/func_def_socratic_step{step_nr}.txt"
with open(priming_text_path, "r") as f:
    priming_text = f.read()
prompt = priming_text + prompt[len(prompt.split("def exercise4")[0])-3:]
print(prompt)

def exercise1():
    """
    To run his grocery store, Mr. Haj needs $4000 a day. This money is used to pay for orders done, delivery costs and employees' salaries. If he spends 2/5 of the total operation costs on employees' salary and 1/4 of the remaining amount on delivery costs
    """
    money_per_day = 4000
    employees_money = money_per_day * 2 / 5

    # How much money does Mr. Haj have left after paying the employees?
    money_left = money_per_day - employees_money
    return float(money_left)


def exercise2():
    """
    Sue works in a factory and every 30 minutes, a machine she oversees produces 30 cans of soda.
    """
    sets_in_hour = 2
    sets_in_8_hours = sets_in_hour * 8

    # How many cans of soda can one machine produce in 8 hours?
    cans_per_set = 30
    cans_per_machine = cans_per_set * sets_in_8_hours
    return float(cans_per_machine)


def exercise3():
    """
    Mr. Rainwater has some goats, 9 cows and some chickens. He has 4 times as many goats as co

In [22]:
transformers.set_seed(5)
pass_1_list = []

for sample_q, sample_a in zip(sample_q_list[:50], sample_a_list[:50]):
    prompt = current_dataset.generate_prompt(sample_q)

    line_cnt = 0
    print_pattern = re.compile(r"def exercise4")
    while "return " not in prompt[re.search(print_pattern, prompt).start():] and line_cnt < 15:
        line_cnt += 1
        tokens = tokenizer(prompt, return_tensors="pt").input_ids
        generated_tokens = model.generate(
            tokens.long().cuda(),
            use_cache=True,
            do_sample=True,
            top_k=gen_args.top_k,
            temperature=gen_args.temperature,
            top_p=gen_args.top_p,
            min_length=len(tokens[0]) + gen_args.min_length,
            max_length=len(tokens[0]) + gen_args.max_length_after_input,
            num_return_sequences=gen_args.num_return_sequences,
            pad_token_id=tokenizer.eos_token_id,
        )

        list_outputs = preproc_gen_toks(
            generated_tokens, len(tokens[0]), tokenizer
        )
        #print(len(tokens[0]))
        #print(len(tokens[0]) + gen_args.max_length_after_input)
        #print(list_outputs)

        prompt += "\n" + list_outputs


        if "return " in prompt[re.search(print_pattern, prompt).start():]:
            try:
                #print(prompt[re.search(print_pattern, prompt).start():])
                execute(prompt)
                s = exercise4()
            except Exception as e:
                print(e)
                s = 1111111
        #print(prompt)

    pass_1_list.append(int(s == float(sample_a)))
    if int(s == float(sample_a)) == 1:
        print(s)
        print(float(sample_a))

np.mean(np.array(pass_1_list))

80.0
80.0
7100.0
7100.0
name 'nr_rabbits_in_the_' is not defined
invalid syntax (<string>, line 43)
name 'suits_per_lily' is not defined
name 'minutes_per_day' is not defined
name 'tilling_length' is not defined
unsupported operand type(s) for -: 'float' and 'list'
73.0
73.0
name 'distance_between_kenn_and_' is not defined
invalid syntax (<string>, line 42)
1890
1890.0


0.08

In [25]:
current_dataset = dh.init_dataset_from_name(
        "gsm8k-socratic",
        primingtext_path=priming_text_path,
        sample_func=exp_impl.sample_n_for_prompting,
        generate_prompt_func=exp_impl.generate_prompt,
    )

In [27]:
current_dataset.data[0]["answer"]

'How many clips did Natalia sell in May? ** Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nHow many clips did Natalia sell altogether in April and May? ** Natalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72'