In [1]:
import datetime
import os
import random
import pandas as pd
import numpy as np
import operator
import random
from tqdm import tqdm

try:
    import binutil  # required to import from dreamcoder modules
except ModuleNotFoundError:
    import bin.binutil  # alt import if called as module

from dreamcoder.task import Task
from dreamcoder.dreamcoder import *
from dreamcoder.domains.minigrid.primitives import basePrimitives, tmap, taction, idx_to_action, tdirection
from dreamcoder.grammar import Grammar
from dreamcoder.utilities import testTrainSplit, eprint, numberOfCPUs
from dreamcoder.type import arrow
from dreamcoder.domains.minigrid.nn_model_maze import *
from dreamcoder.dreamcoder import commandlineArguments
from dreamcoder.utilities import numberOfCPUs

import transformers
from transformers import RobertaTokenizer, T5ForConditionalGeneration, AutoTokenizer, TrainingArguments, Seq2SeqTrainer

In [2]:
len(Grammar.uniform(basePrimitives()).primitives)

22

In [11]:
import dill
from dreamcoder.primitiveGraph import graphPrimitivesFromGrammar

path = '/home/ma/e/eberhardinger/workspaces/T5-experimens/lookup-table-prompt/results.pkl'
with open(path, 'rb') as handle:
    result = dill.load(handle)
    
print(result.keys())

for k, v in result.items():
    print(k, len(v['grammar'].primitives) - len(Grammar.uniform(basePrimitives()).primitives))

dict_keys([3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
3 48
4 54
5 62
6 70
7 75
8 82
9 85
10 86
11 89
12 89
13 89
14 89
15 89


In [4]:
grammars = [g['grammar'] for g in result.values()]
g = grammars[-1]
#g.productions

In [6]:
grammars = [g['grammar'] for g in result.values()]
graphPrimitivesFromGrammar(grammars, "%s_primitives_%d_" % ('/home/ma/e/eberhardinger/workspaces/T5-experimens/lookup-table-prompt/T5', list(result.keys())[-1]))

 #(lambda (get $0 2 1))

 #(lambda (lambda (if (eq-obj? wall-obj (get $1 4 $0)) left-action forward-action)))

 #(lambda (if (eq-obj? wall-obj (get $0 2 1)) left-action forward-action))

 #(lambda (if (eq-obj? wall-obj (get $0 2 3)) left-action forward-action))

 #(lambda (get $0 2))

 #(lambda (if (eq-obj? wall-obj $0) left-action forward-action))

 #(lambda (lambda (if (eq-obj? wall-obj (get $1 $0 3)) left-action forward-action)))

 #(lambda (get $0 2 3))

 #(lambda (if $0 left-action forward-action))

 #(lambda (lambda (if (eq-obj? wall-obj (get $1 2 $0)) left-action forward-action)))

 #(lambda (if $0 forward-action left-action))

 #(lambda (if (eq-obj? (get $0 2 3) wall-obj) left-action forward-action))

 #(lambda (#(lambda (if $0 left-action forward-action)) (eq-obj? (#(lambda (get $0 2 3)) $0) wall-obj)))

 #(lambda (lambda (if (eq-obj? (#(lambda (get $0 2 3)) $1) (get $1 1 $0)) left-action forward-action)))

 #(lambda (#(lambda (if $0 left-action forward-action)) (eq-obj? wall-

In [49]:

def get_out_string(out):
    if out == 0:
        return 'a'
    elif out == 1:
        return 'b'
    elif out == 2:
        return 'c'
    elif out == 3:
        return 'd'
    elif out == 4:
        return 'e'
    elif out == 5:
        return 'f'
    elif out == 6:
        return 'g'
    elif out == 7:
        return 'h'
    elif out == 8:
        return 'i'
    elif out == 9:
        return 'j'
        
def all_equal(lst):
    return not lst or lst.count(lst[0]) == len(lst)


def parseData(taskData, groupby='run_id', verbose=False):
    columns = ['process', 'obs', 'obs direction', 
               'action', 'reward', 'done', 'run_id']

    df = pd.DataFrame(taskData, columns=columns)
    df = df.drop(['process'], axis=1)
    df.action = df.action.apply(lambda x: x[0])
    df.insert(0, 'run_id', df.pop('run_id'))

    group = df.groupby('run_id')

    groups_to_consider = []

    for key in group.groups.keys():
        g = group.get_group(key)
        if verbose:
            print(f'group {key}')
        if not g[g.reward > 0.0].count().all():
            if verbose:
                print(f'no reward..')
        else:
            reward = g[g.reward > 0.0].reward.iloc[0]
            if reward < 0.9:
                if verbose:
                    print(f'skip {key} because reward is to small')
                continue
            if verbose:
                print(f'needed {g.shape[0]} steps. Reward: {reward}')
            groups_to_consider.append(key)
    group = group.filter(lambda x: x.run_id.mean() in groups_to_consider)
    print(group.shape)
    return group.groupby(groupby)


def makeTasks(data, randomChunkSize=True, fixedChunkSize=None):
    assert randomChunkSize or (not randomChunkSize and fixedChunkSize)
    keys = data.groups.keys()
    print('keys:', len(keys))
    tasks = []
    for key in keys:
        to_imitate = data.get_group(key)
        if randomChunkSize:
            chunkSize = random.randint(5, 25)
        else:
            chunkSize = fixedChunkSize
        examples = []
        part = 0
        for _, row in to_imitate.iterrows():
            input_ex = ((row.obs[0] * 10).astype(int).tolist(), int(row['obs direction'],))
            output_ex = int(row.action)
            examples.append((input_ex, output_ex))

            if chunkSize > 0 and chunkSize <= len(examples):
                # we check that the chosen actions are not all the same
                # otherwise it is too easy to find a program if all actions/output examples are the same
                # this results in programs such as (lambda (lambda forward-action))
                all_chosen_actions = list(zip(*examples))[1]
                if not all_equal(all_chosen_actions):
                    tasks.append(Task(f'perfect maze {key} size {chunkSize} part {part}', arrow(tmap, tdirection, taction), examples))
                    part += 1
                    # we reset examples and add new chunkSize taskss
                    examples = []
                    if randomChunkSize:
                        chunkSize = random.randint(5, 25)
                    else:
                        chunkSize = fixedChunkSize
                    
        if len(examples) > 3:
            all_chosen_actions = list(zip(*examples))[1]
            if not all_equal(all_chosen_actions):
                tasks.append(Task(f'perfect maze {key} size {chunkSize} part {part}', arrow(tmap, tdirection, taction), examples))

    # select random obs and actions to test
    for key in keys:
        if randomChunkSize:
            chunkSize = random.randint(5, 25)
        else:
            chunkSize = fixedChunkSize
        to_imitate = data.get_group(key)
        examples = []
        part = 0
        already_sampled = []
        while len(to_imitate.index) - len(already_sampled) > chunkSize:
            curr_sample = random.sample([x for x in to_imitate.index if x not in already_sampled], chunkSize)
            for i in curr_sample:
                row = to_imitate.loc[i]
                input_ex = ((row.obs[0] * 10).astype(int).tolist(), int(row['obs direction'],))
                output_ex = int(row.action)
                examples.append((input_ex, output_ex))

                if chunkSize > 0 and chunkSize == len(examples):
                    # we check that the chosen actions are not all the same
                    # otherwise it is too easy to find a program if all actions/output examples are the same
                    # this results in programs such as (lambda (lambda forward-action))
                    all_chosen_actions = list(zip(*examples))[1]
                    if not all_equal(all_chosen_actions):
                        tasks.append(Task(f'perfect maze {key} size {chunkSize} random {part}', arrow(tmap, tdirection, taction), examples))
                        part += 1
                        # we reset examples and add new chunkSize taskss
                        examples = []

            already_sampled += curr_sample

    print(f'Created {len(tasks)} tasks with chunkSize {fixedChunkSize}')
    return tasks


def run_dreamcoder(arguments, taskData, outputDirectory, chunkSize, resumeIteration=None, iterations=None):
    random.seed(42)
    tasks = makeTasks(taskData, chunkSize=chunkSize)
    # return tasks
    eprint("Got %d tasks..." % len(tasks))

    if len(tasks) == 0:
        return None

    arguments.pop('primitives', None)
    arguments.pop('resume', None)
    arguments.pop('iterations', None)
    # Create grammar
    grammar = Grammar.uniform(basePrimitives())

    # EC iterate
    generator = ecIteratorT5(grammar,
                           tasks,
                           outputPrefix="%s/maze" % outputDirectory,
                           resume=resumeIteration,
                           iterations=iterations,
                           **arguments)
    for i, _ in enumerate(generator):
        pass
        #print('ecIterator count {}'.format(i))

    if resumeIteration is None:
        return 1

    return int(resumeIteration) + 1


In [53]:

class DatasetCreator():
    def __init__(self, featureExtractor, grammar):
        super().__init__()
        self.id = id
        self.featureExtractor = featureExtractor
        self.generativeModel = grammar

    def taskEmbeddings(self, tasks):
        return {task: self.featureExtractor.featuresOfTask(task).data.cpu().numpy()
                for task in tasks}

    def replaceProgramsWithLikelihoodSummaries(self, frontier):
        return Frontier(
            [FrontierEntry(
                program=self.grammar.closedLikelihoodSummary(frontier.task.request, e.program),
                logLikelihood=e.logLikelihood,
                logPrior=e.logPrior) for e in frontier],
            task=frontier.task)

    def sampleHelmholtz(self, requests, statusUpdate=None, seed=None):
        if seed is not None:
            random.seed(seed)
        request = random.choice(requests)

        program = self.generativeModel.sample(request, maximumDepth=8, maxAttempts=100)
        if program is None:
            return None
        task = self.featureExtractor.taskOfProgram(program, request)

        if statusUpdate is not None:
            flushEverything()
        if task is None:
            return None

        if hasattr(self.featureExtractor, 'lexicon'):
            if self.featureExtractor.tokenize(task.examples) is None:
                return None

        ll = self.generativeModel.logLikelihood(request, program)
        frontier = Frontier([FrontierEntry(program=program,
                                           logLikelihood=0., logPrior=ll)],
                            task=task)
        return frontier

    def sampleProgramWithTask(self, requests, min_len, max_len, statusUpdate=None, seed=None):
        if seed is not None:
            random.seed(seed)
        request = random.choice(requests)

        program = self.generativeModel.sample(request, maximumDepth=6, maxAttempts=100)
        if program is None:
            return None
        task = self.featureExtractor.taskOfProgram(program, request, min_len=min_len, max_len=max_len)

        if statusUpdate is not None:
            flushEverything()
        if task is None:
            return None, None

        return task, program

    def sampleManyProgramsWithTasks(self, tasks, N, min_len, max_len, verbose=False):
        if verbose:
            eprint("Sampling %d programs from the prior..." % N)
        flushEverything()
        requests = list({t.request for t in tasks})

        frequency = N / 50
        startingSeed = random.random()

        if verbose:
            looper = tqdm(range(N))
        else:
            looper = range(N)

        # Sequentially for ensemble training.
        data = [self.sampleProgramWithTask(requests, min_len, max_len,
                                           statusUpdate='.' if n % frequency == 0 else None,
                                           seed=startingSeed + n) for n in looper]

        flushEverything()
        data = [z for z in data if not any(x is None for x in z)]
        if verbose:
            eprint()
            eprint("Got %d/%d valid datapoints." % (len(data), N))
        flushEverything()

        return data

    def sampleManyFrontiers(self, tasks, N):
        eprint("Sampling %d programs from the prior..." % N)
        flushEverything()
        requests = list({t.request for t in tasks})

        frequency = N / 50
        startingSeed = random.random()

        # Sequentially for ensemble training.
        frontiers = [self.sampleHelmholtz(requests,
                                          statusUpdate='.' if n % frequency == 0 else None,
                                          seed=startingSeed + n) for n in range(N)]

        eprint()
        flushEverything()
        frontiers = [z for z in frontiers if z is not None]
        eprint()
        eprint("Got %d/%d valid frontiers." % (len(frontiers), N))
        flushEverything()

        return frontiers

    def createDataset(self, tasks, N, with_tasks=False, no_spaces=False, min_len=5, max_len=20, verbose=False):
        dataset = []
        data = self.sampleManyProgramsWithTasks(tasks, N, min_len, max_len, verbose=verbose)
        for task, program in data:
            inp_prompt = ''
            for examples in task.examples:
                inp = str(examples[0])
                out = examples[1]
                inp_string = get_inp_string_for_task(inp, no_spaces=no_spaces)

                out_string = get_out_string(out)
                inp_prompt += f'{inp_string} {out_string} '

            if with_tasks:
                dataset.append((inp_prompt.strip(), str(program), task))
            else:
                dataset.append((inp_prompt.strip(), str(program)))

        return dataset
          

In [54]:
from torch.utils.data import Dataset, DataLoader

class FactoringDataset(Dataset):
    def __init__(self, dataset_itself):
        self.data = dataset_itself
        
    def __getitem__(self, idx):
        return self.data[idx]
    
    def __len__(self):
        return len(self.data)


In [52]:
output_dir = '/home/ma/e/eberhardinger/workspaces/T5-experimens/no-spaces-in-prompt_maxlen-35/'
checkpoint_dirs = list(filter(lambda x: os.path.isdir(os.path.join(output_dir, x)) and 'checkpoint-' in x, os.listdir(output_dir)))
checkpoint_dir = sorted(checkpoint_dirs, key=lambda x: int(x.replace('checkpoint-', '')))[-1]
checkpoint_dir = os.path.join(output_dir, checkpoint_dir)
model = T5ForConditionalGeneration.from_pretrained(checkpoint_dir).to('cuda')
tokenizer = RobertaTokenizer.from_pretrained(checkpoint_dir)
c = Collator(tokenizer)

TypeError: __init__() missing 1 required positional argument: 'tokenizer'

In [125]:
import re
data_file = "/home/ma/e/eberhardinger/workspaces/ec/dreamcoder/domains/perfect-maze-minigrid/collected_data/2022-12-10T15:32:12.354349.npy"
data = np.load(data_file, allow_pickle=True)
parsed_data = parseData(data)
tasks = makeTasks(parsed_data, randomChunkSize=False, fixedChunkSize=10)
maze_feature_extractor = MinigridMazeFeatureExtractor(tasks)

def find_sequences(string):
    return re.findall(r"2+", string)

def get_inp_string_for_task(inp_string, compress=False, no_spaces=False):
    inp_string = inp_string.replace(',', '').replace('[', '').replace(']', '').replace('(', '').replace(')', '')
    if no_spaces:
        inp_string = inp_string.replace(' ', '')
    
    if compress:
        inp_string = inp_string.replace('2222', '4')

    return inp_string.strip()


dataset_creator = DatasetCreator(maze_feature_extractor, grammars[0])
dataset = dataset_creator.createDataset(tasks, 1, min_len=60, max_len=60, no_spaces=True)
dataset

(775, 6)
keys: 11
Created 146 tasks with chunkSize 10
x input dim 5 y input dim 5 channel input dim 1


[('22222222222222122221222210 a 22221222212222122222222223 a 12111122211211122222222222 c 11211212221121122222222222 c 11121221221112122222222222 a 12111221211112112222111111 a 22222222222211122122221110 c 22222222222221122212222110 c 22222222222222122221222210 a 22221222212222122222222223 a 12111122211211122222222222 c 11211212221121122222222222 c 11121221221112122222222222 a 12111221211112112222111111 a 22222222222211122122221110 c 22222222222221122212222110 c 22222222222222122221222210 a 22221222212222122222222223 a 12111122211211122222222222 c 11211212221121122222222222 c 11121221221112122222222222 a 12111221211112112222111111 a 22222222222211122122221110 c 22222222222221122212222110 c 22222222222222122221222210 a 22221222212222122222222223 a 12111122211211122222222222 c 11211212221121122222222222 c 11121221221112122222222222 a 12111221211112112222111111 a 22222222222211122122221110 c 22222222222221122212222110 c 22222222222222122221222210 a 22221222212222122222222223 a 12111122211

In [142]:
import re

class LookupTableCollator:
    def __init__(self, tokenizer):
        self.lookup_table = self.generate_lookup_table()
        self.tokenizer = tokenizer

    def __call__(self, batch):
        # entry[1] is the spec, need to call repr to turn it into a string. entry[0] is the prog_str already
        ret = {"input_ids": self.encode_obs([entry[0] for entry in batch], padding=True, truncation=True), 
            "labels": self.tokenizer([entry[1] for entry in batch], padding=True, return_tensors='pt').input_ids}
        return ret         

    def generate_lookup_table(self):
        tokens = []
        # encode from 1 - 1 * 25
        tokens += ['1' * i for i in range(1, 26)]

        # encode from 2 - 2 * 25
        tokens += ['2' * i for i in range(1, 26)]

        # encode also 0, 3 and zero, one, two, three, four, five, six
        tokens += ['0', '3', 'a', 'b', 'c', 'd', 'e', 'f', 'g']    
        return {k: v+1 for v, k in enumerate(tokens)}
    
    
    def encode_obs(self, observations, padding=True, truncation=True):
        tokens = []

        for obs in observations:
            seq = []
            for item in re.finditer(r"(.)\1*", obs):
                word = item.group(0).strip()
                if not word:
                    continue  
                
                token = self.lookup_table[word]
                seq.append(token)
            if padding:
                while len(seq) < 512:
                    seq.append(0)
            if truncation:
                seq = seq[:512]
        
            tokens.append(seq)
        return torch.tensor(tokens)
    
class CollatorOld:
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer

    def __call__(self, batch):
        # entry[1] is the spec, need to call repr to turn it into a string. entry[0] is the prog_str already
        ret = {"input_ids": self.tokenizer([entry[0] for entry in batch], padding=True,  truncation=False, return_tensors='pt').input_ids,
               "labels": self.tokenizer([entry[1] for entry in batch], padding=True,  return_tensors='pt').input_ids}
        return ret
    
c = Collator(tokenizer)

In [143]:
len(CollatorOld(tokenizer)(dataset)['input_ids'][0])

767

In [144]:
c(dataset)['input_ids'][0]

truncate seq len of 569


tensor([39,  1, 29,  1, 29,  1, 51, 53, 29,  1, 29,  1, 29,  1, 35, 52, 53,  1,
        26,  4, 28,  2, 26,  3, 36, 55,  2, 26,  2, 26,  1, 28,  2, 26,  2, 36,
        55,  3, 26,  1, 27,  1, 27,  3, 26,  1, 36, 53,  1, 26,  3, 27,  1, 26,
         4, 26,  2, 29,  6, 53, 37,  3, 27,  1, 29,  3, 51, 55, 38,  2, 28,  1,
        29,  2, 51, 55, 39,  1, 29,  1, 29,  1, 51, 53, 29,  1, 29,  1, 29,  1,
        35, 52, 53,  1, 26,  4, 28,  2, 26,  3, 36, 55,  2, 26,  2, 26,  1, 28,
         2, 26,  2, 36, 55,  3, 26,  1, 27,  1, 27,  3, 26,  1, 36, 53,  1, 26,
         3, 27,  1, 26,  4, 26,  2, 29,  6, 53, 37,  3, 27,  1, 29,  3, 51, 55,
        38,  2, 28,  1, 29,  2, 51, 55, 39,  1, 29,  1, 29,  1, 51, 53, 29,  1,
        29,  1, 29,  1, 35, 52, 53,  1, 26,  4, 28,  2, 26,  3, 36, 55,  2, 26,
         2, 26,  1, 28,  2, 26,  2, 36, 55,  3, 26,  1, 27,  1, 27,  3, 26,  1,
        36, 53,  1, 26,  3, 27,  1, 26,  4, 26,  2, 29,  6, 53, 37,  3, 27,  1,
        29,  3, 51, 55, 38,  2, 28,  1, 

In [138]:
c(dataset)['input_ids'][0]

truncate seq len of 569


tensor([    1,  5520, 24886, 24886,  3600,  3600,  1578,    29, 24886, 24886,
         3437,  2539, 30304,  2138,  1105,  6030,  3787,    26,  3707,    26,
         2539,  3787,    26,  3787,    26, 10392,  3787,    26, 29941,  2539,
        27284, 14260, 14260, 27284, 26959,  8643, 25452,  1578,    27,  2138,
         1105,  5558,  3787,    29,  9222,  3707,    27,  1578,    27, 24886,
         4763, 23643,  7414,  3787,    28, 24886,  2947, 23643,  5520, 24886,
        24886,  3600,  3600,  1578,    29, 24886, 24886,  3437,  2539, 30304,
         2138,  1105,  6030,  3787,    26,  3707,    26,  2539,  3787,    26,
         3787,    26, 10392,  3787,    26, 29941,  2539, 27284, 14260, 14260,
        27284, 26959,  8643, 25452,  1578,    27,  2138,  1105,  5558,  3787,
           29,  9222,  3707,    27,  1578,    27, 24886,  4763, 23643,  7414,
         3787,    28, 24886,  2947, 23643,  5520, 24886, 24886,  3600,  3600,
         1578,    29, 24886, 24886,  3437,  2539, 30304,  2138, 

In [115]:
def createTestDataFromTasks(tasks, with_tasks=False, no_spaces=False):
    dataset = []
    for task in tasks:
        inp_prompt = ''
        for examples in task.examples:
            inp = str(examples[0])
            out = examples[1]
            inp_string = get_inp_string_for_task(inp, no_spaces=no_spaces)
            out_string = get_out_string(out)
            inp_prompt += f'{inp_string} {out_string} '

        if with_tasks:
            dataset.append((inp_prompt.strip(), task))
        else:
            dataset.append((inp_prompt.strip()))

    return dataset

# generate n samples with t temperature
def generate_samples_with_temp(txt, n_samples, temp):
    to_tokenizer = [txt for i in range(n_samples)]
    outputs = model.generate(tokenizer(to_tokenizer, return_tensors='pt', padding=True).input_ids.to('cuda'), do_sample=True, max_length=128, temperature = temp)
    results = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return results

In [116]:
from multiprocessing import Pool
from dreamcoder.program import Program

def run_on_input_examples(task, program, grammar, verbose=False):
    try:
        print(type(program))
        print(program)
        program = Program.parse(program)
        print('after parse')
        for inp, out in task.examples:
            # output ex is the action
            pred = runWithTimeout(lambda: program.runWithArguments(inp), None)
            if verbose:
                print('Input:', inp, 'Out:', out, 'Pred:', pred)
            if out != pred:
                return None
            
    except Exception as e: 
        if verbose:
            print('exception')
            print(e)
        return None
    try:
        logPrior = grammar.logLikelihood(task.request, program)
    except  Exception as e:
        #print(f'run_on_input_examples > logLikelihood failed for: {program}')
        # not a correctly typed program
        print('logprior exeption', e)
        return None
        
    return logPrior
        
def test_programs_on_task(task, grammar, n=5, temp=1.0, verbose=False):
    progs = generate_samples_with_temp(task[0], n, temp)
    found_progs = []
    for i, prog in enumerate(progs):
        if verbose:
            print(prog)
        log_prior = run_on_input_examples(task[1], prog, grammar, verbose=verbose)
        if log_prior is not None:
            found_progs.append((Program.parse(prog), log_prior))

    if len(found_progs) == 0:
        return None, -1  
    
    found_progs.sort(key=lambda x: x[1], reverse=True)
    
    best = found_progs[0]
    return best[0], best[1]         

def test_programs_on_task_multithread(task, n=5, temp=1.0, verbose=False):
    progs = generate_samples_with_temp(task[0], n, temp)
    indices = range(n)
    inputs = zip([task[1]]*n , progs, indices, [verbose]*n)

    with Pool(n) as p:
        results = p.map(run_on_input_examples, inputs)
    
    results = [x for x in results if x[0] is not None]
    results.sort(key=lambda x: x[1])
    if len(results) == 0:
        return None, -1
    
    best = results[0]
    return best[0], results[-1]

In [13]:
from dreamcoder.frontier import *

def generate_frontiers(testTasks, stats):
    frontiers = []
    for tt, stat in zip(testTasks, stats):
        _, task = tt
        program, prior = stat
        if program is None:
            continue
        frontier = Frontier([FrontierEntry(program=program,
                            logLikelihood=0., logPrior=prior)],
                            task=task)
        frontiers.append(frontier)
    return frontiers
    
def check_test_tasks_old(testTasks, grammar, n_sampling=100, verbose=False):
    stats = []
    for tt in tqdm(testTasks):       
        p, n = test_programs_on_task(tt, grammar, n=n_sampling, verbose=verbose)
        stats.append((p,n))
    return stats


def check_test_tasks(testTasks, grammar, n_sampling=100, verbose=False):
    stats = []

    looper = testTasks
    if verbose:
        looper = tqdm(testTasks)

    for tt in looper:
        p, n = test_programs_on_task(tt, grammar, n=n_sampling, verbose=verbose)
        stats.append((p, n))
    return stats


def get_solved_tasks(stats, verbose=False):
    solved = [x for x in stats if x[0] is not None]
    return len(solved),len(stats)

def ec_iterator_T5(parsed_data, training_args, n_sampling=100, random_programs=100000, save_dataset=True, verbose=False):
    results = {}
    grammar = Grammar.uniform(basePrimitives())
    global result
    for i in range(5, 20):
        print(f'Start iteration {i}')
        transformers.logging.set_verbosity_info()
        tasks = makeTasks(parsed_data, randomChunkSize=False, fixedChunkSize=i)
        maze_feature_extractor = MinigridMazeFeatureExtractor(tasks)
        dataset_creator = DatasetCreator(maze_feature_extractor, grammar)
        dataset_file_name = f'iter_{i}-ec_iterator_T5-gen_data_{random_programs}.npy'
        if save_dataset and os.path.exists(dataset_file_name):
            dataset = np.load(dataset_file_name)
            print(f'loaded dataset {dataset_file_name}')
        else:
            dataset = dataset_creator.createDataset(tasks, random_programs)
           
        if save_dataset:
            np.save(dataset_file_name, np.array(dataset))
            print(f'saved dataset to {dataset_file_name}')
            
        dataset = FactoringDataset(dataset)
        trainer = Seq2SeqTrainer(model=model, args=training_args, train_dataset=dataset, tokenizer=tokenizer, compute_metrics=None, data_collator=Collator(tokenizer))
        trainer.train()

        transformers.logging.set_verbosity_error()
        testTasks = createTestDataFromTasks(tasks, True)
        stats = check_test_tasks(testTasks, grammar, n_sampling=n_sampling, verbose=False)
        solved = get_solved_tasks(stats, verbose=True)
        
        # compress data of generated frontiers
        frontiers = generate_frontiers(testTasks, stats, grammar)
        result = ECResult(parameters={},
                          grammars=[grammar],
                          taskSolutions={
                              f.task: f for f in frontiers},
                          recognitionModel=None, numTestingTasks=len(testTasks),
                          allFrontiers={
                              f.task: f for f in frontiers})
        grammar = consolidate(result, grammar, iteration=0, arity=3, aic=1.0, pseudoCounts=30.0, structurePenalty=1.5, compressor='ocaml', topK=1, CPUs=numberOfCPUs())
        results[i] = {'stats': stats, 'grammar': grammar}


In [8]:
output_dir = '/home/ma/e/eberhardinger/workspaces/T5-experimens/no-spaces-in-prompt_maxlen-35/'
checkpoint_dirs = list(filter(lambda x: os.path.isdir(os.path.join(output_dir, x)) and 'checkpoint-' in x, os.listdir(output_dir)))
checkpoint_dir = sorted(checkpoint_dirs, key=lambda x: int(x.replace('checkpoint-', '')))[-1]
checkpoint_dir = os.path.join(output_dir, checkpoint_dir)
model = T5ForConditionalGeneration.from_pretrained(checkpoint_dir).to('cuda')
tokenizer = RobertaTokenizer.from_pretrained(checkpoint_dir)
data_file = "/home/ma/e/eberhardinger/workspaces/ec/dreamcoder/domains/perfect-maze-minigrid/collected_data/2022-12-10T15:32:12.354349.npy"
data = np.load(data_file, allow_pickle=True)
parsed_data = parseData(data)
grammar_file = os.path.join(output_dir, 'results.pkl')
with open(grammar_file, 'rb') as handle:
    result = dill.load(handle)
grammar = [g['grammar'] for g in result.values()][-1]
key = list(result.keys())[-1]

(775, 6)


In [13]:
results = {}
for i in range(5, 20):
    tasks = makeTasks(parsed_data, randomChunkSize=False, fixedChunkSize=i)
    testTasks = createTestDataFromTasks(tasks, True, no_spaces=True)
    stats = check_test_tasks(testTasks[:1], grammar, n_sampling=10, verbose=True)
    solved = get_solved_tasks(stats, verbose=True)
    print(solved)
    break
    #results[i] = solved
results

keys: 11
Created 210 tasks with chunkSize 5


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.39s/it]

(lambda (lambda (#(lambda (lambda (lambda (#(lambda (if $0 left-action forward-action)) (eq-obj? (#(lambda (get $0 2 3)) $2) (get $2 $1 $0)))))) (if (#(lambda (eq-obj? $0 empty-obj)) empty-obj) $1 (if (#(lambda (eq-obj? (#(lambda (get $0 2 3)) $0) empty-obj)) $1) $1 $1)) 4 4)))
<class 'str'>
(lambda (lambda (#(lambda (lambda (lambda (#(lambda (if $0 left-action forward-action)) (eq-obj? (#(lambda (get $0 2 3)) $2) (get $2 $1 $0)))))) (if (#(lambda (eq-obj? $0 empty-obj)) empty-obj) $1 (if (#(lambda (eq-obj? (#(lambda (get $0 2 3)) $0) empty-obj)) $1) $1 $1)) 4 4)))
after parse
Input: ([[2, 2, 2, 2, 2], [2, 2, 2, 2, 2], [1, 1, 1, 1, 1], [2, 2, 1, 2, 2], [1, 2, 1, 1, 1]], 0) Out: 2 Pred: 0
(lambda (lambda (#(lambda (lambda (lambda (#(lambda (lambda (#(lambda (if $0 left-action forward-action)) (eq-obj? $1 $0)))) (if $0 $2 (#(lambda (get $0 2 3)) $1)) wall-obj)))) (get $1 0 3) $1 (#(lambda (eq-obj? (#(lambda (get $0 2 3)) $0) empty-obj)) $1))))
<class 'str'>
(lambda (lambda (#(lambda (lam




{}

In [12]:
Program.parse('(lambda forward-action)')


(lambda forward-action)

In [18]:
path = os.path.join(f'/home/ma/e/eberhardinger/workspaces/T5-experimens/', 'results.pkl')
with open(path, "wb") as handle:
    try:
        dill.dump(results, handle)
    except TypeError as e:
        eprint(results)

In [20]:
[s/a for s,a in results.values()]

[0.8579545454545454,
 0.765,
 0.6686746987951807,
 0.5617977528089888,
 0.44966442953020136,
 0.36764705882352944,
 0.368,
 0.319672131147541,
 0.30357142857142855,
 0.32075471698113206,
 0.23655913978494625,
 0.1590909090909091,
 0.11627906976744186,
 0.1927710843373494,
 0.1518987341772152]

In [None]:
training_args = TrainingArguments(per_device_train_batch_size=112, 
                                  gradient_accumulation_steps=2,
                                  save_steps = 10000,
                                  num_train_epochs=5,
                                  output_dir = './t5-ec-checkpoints/')
data_file = "/home/ma/e/eberhardinger/workspaces/ec/dreamcoder/domains/perfect-maze-minigrid/collected_data/2022-12-10T15:32:12.354349.npy"
data = np.load(data_file, allow_pickle=True)
parsed_data = parseData(data)
tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-base')
model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-small')
ec_iterator_T5(parsed_data, training_args, random_programs=100000, save_dataset=True)

(775, 6)
Start iteration 5
keys: 11
Created 210 tasks with random chunkSize
x input dim 5 y input dim 5 channel input dim 1
loaded dataset iter_5-ec_iterator_T5-gen_data_100000.npy
saved dataset to iter_5-ec_iterator_T5-gen_data_100000.npy


***** Running training *****
  Num examples = 100000
  Num Epochs = 5
  Instantaneous batch size per device = 112
  Total train batch size (w. parallel, distributed & accumulation) = 224
  Gradient Accumulation steps = 2
  Total optimization steps = 2230
  Number of trainable parameters = 60492288


Step,Training Loss
500,0.2033
1000,0.1153
1500,0.1114
2000,0.1095




Training completed. Do not forget to share your model on huggingface.co/models =)


  7%|██████████████                                                                                                                                                                                                     | 14/210 [00:45<10:49,  3.31s/it]PANIC: not enough arguments for the type
request bool
tp bool -> bool -> bool
expression (and (eq-obj? (get $1 (if (eq-obj? empty-obj empty-obj) (get $1 3 3) 2) (get $1 2 0))))
xs [(eq-obj? (get $1 (if (eq-obj? empty-obj empty-obj) (get $1 3 3) 2) (get $1 2 0)))]
argumentTypes [bool, bool]
PANIC: Grammar failure, exporting to  failures/grammarFailure1678642667.309182.pickle
  7%|███████████████                                                                                                                                                                                                    | 15/210 [00:48<10:30,  3.23s/it]

run_on_input_examples > logLikelihood failed for: (lambda (lambda (if (and (and (eq-obj? (get $1 (if (eq-obj? empty-obj empty-obj) (get $1 3 3) 2) (get $1 2 0)))) (eq-direction? direction-1 $0)) forward-action left-action)))


 58%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                         | 121/210 [06:50<04:51,  3.27s/it]$1 Not in candidates
Candidates is {empty-obj: (0.0, tobj, Context(next = 3, {t2 ||> tobj, t1 ||> tobj, t0 ||> taction})), wall-obj: (0.0, tobj, Context(next = 3, {t2 ||> tobj, t1 ||> tobj, t0 ||> taction})), if: (0.0, bool -> tobj -> tobj -> tobj, Context(next = 4, {t3 ||> tobj, t2 ||> tobj, t1 ||> tobj, t0 ||> taction})), get: (0.0, array(array(tobj)) -> int -> int -> tobj, Context(next = 3, {t2 ||> tobj, t1 ||> tobj, t0 ||> taction}))}
request is tobj
xs []
environment [tdirection, array(array(tobj))]
 58%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                        | 122/210 [06:53<04:59,  3

run_on_input_examples > logLikelihood failed for: (lambda (lambda (if (eq-obj? (get $1 1 4) (if (eq-direction? direction-0 direction-2) (if (eq-direction? $0 direction-2) $1 $1) empty-obj)) left-action forward-action)))


 68%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                   | 143/210 [08:08<04:01,  3.61s/it]direction-2 Not in candidates
Candidates is {0: (0.0, int, Context(next = 2, {t1 ||> int, t0 ||> taction})), 1: (0.0, int, Context(next = 2, {t1 ||> int, t0 ||> taction})), 2: (0.0, int, Context(next = 2, {t1 ||> int, t0 ||> taction})), 3: (0.0, int, Context(next = 2, {t1 ||> int, t0 ||> taction})), 4: (0.0, int, Context(next = 2, {t1 ||> int, t0 ||> taction})), if: (0.0, bool -> int -> int -> int, Context(next = 3, {t2 ||> int, t1 ||> int, t0 ||> taction}))}
request is int
xs []
environment [tdirection, array(array(tobj))]
 69%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                  | 144/

run_on_input_examples > logLikelihood failed for: (lambda (lambda (if (eq-obj? empty-obj (get $1 2 (if (eq-direction? direction-1 direction-3) direction-2 $0))) forward-action left-action)))


 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                | 194/210 [11:05<00:57,  3.57s/it]direction-0 Not in candidates
Candidates is {if: (0.0, bool -> array(array(tobj)) -> array(array(tobj)) -> array(array(tobj)), Context(next = 3, {t2 ||> array(array(tobj)), t1 ||> array(array(tobj)), t0 ||> taction})), $1: (0.0, array(array(tobj)), Context(next = 2, {t1 ||> array(array(tobj)), t0 ||> taction}))}
request is array(array(tobj))
xs []
environment [tdirection, array(array(tobj))]
 93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████               | 195/210 [11:09<00:54,  3.65s/it]

run_on_input_examples > logLikelihood failed for: (lambda (lambda (if (eq-obj? (get (if (eq-obj? wall-obj wall-obj) $1 direction-0) 1 2) wall-obj) forward-action left-action)))


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 210/210 [12:00<00:00,  3.43s/it]

104/210 -> 49.523809523809526%



Showing the top 5 programs in each frontier being sent to the compressor:
perfect maze 10 size 5 part 0
0.00	(lambda (lambda (if (eq-obj? (get $1 2 3) empty-obj) forward-action left-action)))

perfect maze 10 size 5 part 3
0.00	(lambda (lambda (if (eq-obj? empty-obj (get (if (eq-direction? $0 direction-2) $1 $1) 0 1)) forward-action left-action)))

perfect maze 10 size 5 part 4
0.00	(lambda (lambda (if (eq-direction? direction-1 $0) forward-action left-action)))

perfect maze 10 size 5 part 7
0.00	(lambda (lambda (if (eq-obj? empty-obj (get (if (eq-direction? direction-3 direction-3) $1 $1) 2 3)) forward-action left-action)))

perfect maze 10 size 5 part 10
0.00	(lambda (lambda (if (eq-obj? wall-obj (get $1 0 4)) forward-action left-action)))

perfect maze 10 size 5 part 11
0.00	(lambda (lambda (if (or (eq-obj? (get $1 2 3) wall-obj) (eq-direction? $0 direction-1)) left-action forward-action)))

perfect maze 10 size 5 part 13
0.00	(lambda (lambda (if (not (eq-obj? (get $1 2 3) wall-ob

In [3]:
import wandb

In [9]:
wandb.login()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /container/.netrc


True