In [1]:
import glob
import logging
import random
import traceback
from logging import getLogger
from operator import itemgetter

from ordered_set import OrderedSet
from tqdm.notebook import tqdm

from fl.LLM import ListInterpreter
from formhe.asp.instance import Instance
from formhe.asp.synthesis.ASPSpecGenerator import ASPSpecGenerator
from formhe.asp.synthesis.AspVisitor import AspVisitor
from formhe.exceptions.parser_exceptions import InstanceGroundingException
from formhe.trinity.ng_enumerator import NextGenEnumerator, PresetStatement
from formhe.utils.perm import PermutationGeneratorHelper

In [2]:
instances = glob.glob("../correct_instances/mooshak/**/*.lp")

In [3]:
from utils import config

depth = 2
cycles = 3
mutations_per_program = 30
deletions_per_program = 60
number_mutations = [1, 2, 3, 4, 5]
deletion_probabilities = ([1, 2], [0.8, 0.2])
max_percentage_incorrect_lines_threshold = 0.75
max_errors_until_quit = 1000

conf = config.get()
object.__setattr__(conf, "allow_unsafe_vars", True)
object.__setattr__(conf, "allow_not_generated_predicates", True)
object.__setattr__(conf, "disable_head_empty_or_non_constant_constraint", True)
object.__setattr__(conf, "disable_no_dont_care_in_head_constraint", True)

In [4]:
def create_instance_dict(short_name, incorrect_program, correct_program, n_mutations):
    incorrect_program_reduced = []
    reduce_mapping = {}
    reduce_offset = 0
    for i, line in enumerate(incorrect_program):
        if line == "":
            reduce_mapping[i] = None
            reduce_offset += 1
        else:
            incorrect_program_reduced.append(line)
            reduce_mapping[i] = i - reduce_offset
    if len(incorrect_program_reduced) == 0:
        return None
    incorrect_line_ids_pre = [i for i, b, in enumerate(zip(correct_program, incorrect_program)) if b[0] != b[1]]
    incorrect_line_ids = [reduce_mapping[i] for i, b, in enumerate(zip(correct_program, incorrect_program)) if b[0] != b[1] and b[1] != ""]
    fl = incorrect_line_ids + list(map(lambda x: x - reduce_offset, range(len(correct_program), len(incorrect_program))))
    correction_tmp = itemgetter(*incorrect_line_ids_pre)(correct_program) if len(incorrect_line_ids_pre) != 0 else ()
    correction = list(correction_tmp) if isinstance(correction_tmp, tuple) else [correction_tmp]
    if len(fl) / len(incorrect_program_reduced) >= max_percentage_incorrect_lines_threshold:
        return None
    for line_id in fl:
        assert line_id < len(incorrect_program_reduced)
    return {'instance': short_name,
            'correct_program_lines': correct_program,
            'correct_program': "\n".join(correct_program),
            'incorrect_program_lines': incorrect_program_reduced,
            'incorrect_program': "\n".join(incorrect_program_reduced),
            'correction': correction,
            'n_mutations': n_mutations,
            'missing_lines': any([True for line in incorrect_program if line == ""]),
            'n_mising_lines': sum([1 for line in incorrect_program if line == ""]),
            'fl': fl,
            'line_scores': [1.0 if i in fl else 0.0 for i in range(len(incorrect_program_reduced))]
            }


def process_instance(instance_file):
    short_name = instance_file.replace("../correct_instances/", "")
    print("Generating mutations for", short_name)
    mutations_results = []
    t = tqdm(total=((mutations_per_program * len(number_mutations)) + deletions_per_program) * cycles)
    t.set_description(short_name)
    for cycle_i in range(cycles):
        object.__setattr__(conf, "seed", "yas" + short_name + str(cycle_i))

        try:
            instance = Instance(instance_file, shuffle=cycle_i != 0)
        except Exception as ex:
            print("Skipping", short_name)
            print(ex)
            continue

        predicates = instance.constantCollector.predicates
        spec_generator = ASPSpecGenerator(instance, instance.config.extra_vars, predicates.items())
        trinity_spec = spec_generator.trinity_spec
        asp_visitor = AspVisitor(trinity_spec, spec_generator.free_vars)
        asp_interpreter = ListInterpreter(instance, predicates.keys())

        try:
            correct_program = instance.get_program_lines()
            semi_bound_statements = []
            for rule in instance.constantCollector.not_skipped:
                node = asp_visitor.visit(rule)
                semi_bound_statements.append(PresetStatement(node.children[0], node.children[1].children))
        except Exception as ex:
            print("Skipping", short_name)
            print(ex)
            continue

        for i in range(deletions_per_program):
            to_delete_n = random.choices(deletion_probabilities[0], deletion_probabilities[1])[0]
            lines_to_delete = random.sample(list(range(len(correct_program))), to_delete_n)
            incorrect_program = [l if i not in lines_to_delete else "" for i, l in enumerate(correct_program)]
            d = create_instance_dict(short_name, incorrect_program, correct_program, -1)
            if d is None:
                continue
            t.update()
            mutations_results.append(d)

        empty_statements = [PresetStatement(None, [])]

        enumerator = NextGenEnumerator(trinity_spec, depth,
                                       semi_bound_statements=semi_bound_statements + empty_statements, free_predicates=OrderedSet(predicates.keys()) - instance.constantCollector.predicates_generated,
                                       force_generate_predicates=instance.constantCollector.predicates_used - instance.constantCollector.predicates_generated,
                                       additional_body_roots=1)
        
        enumerator.solver.set("timeout", 10000)

        for n_mutations in number_mutations:
            while enumerator.solver.num_scopes() >= 1:
                enumerator.solver.pop()
            perm_helper = PermutationGeneratorHelper("yas" + short_name + str(cycle_i), len(enumerator.relaxation_vars()), n_mutations, enumerator, return_perms=True)
            mut_count = 0
            j = 0
            while mut_count < mutations_per_program and j <= max_errors_until_quit:
                prog, perm = perm_helper.next()

                if prog is None:
                    break

                try:
                    asp_prog = asp_interpreter.eval(prog)
                    evaluation_result = asp_interpreter.test("\n".join(asp_prog))

                    if evaluation_result:
                        j += 1
                        pass
                    else:
                        d = create_instance_dict(short_name, asp_prog, correct_program, n_mutations)
                        if d is None:
                            continue
                        mut_count += 1
                        t.update()
                        mutations_results.append(d)
                except InstanceGroundingException:
                    j += 1
                    pass
                except AssertionError as e:
                    j += 1
                    traceback.print_exception(e)
                except Exception as e:
                    j += 1
                    pass
                    #traceback.print_exception(e)

    return mutations_results

In [None]:
from multiprocessing import Pool
from tqdm.contrib.telegram import tqdm as tqdmt

getLogger("formhe").setLevel(logging.ERROR)
getLogger("urllib3").setLevel(logging.ERROR)

results = []
with Pool(8) as pool:
    t = tqdmt(pool.imap_unordered(process_instance, instances, chunksize=1), total=len(instances), token="6534160271:AAF2Wui-s86BgQxOhtoeNV5WzYFKDv1ahkk", chat_id="148166194")
    for r in t:
        results += r

# process_instance("../correct_instances/mooshak/01/A_4_8.lp")

# results = []
# for instance in instances:
#     results += process_instance(instance)

Generating mutations forGenerating mutations forGenerating mutations forGenerating mutations for  Generating mutations forGenerating mutations for  Generating mutations forGenerating mutations formooshak/01/A_0_1.lp mooshak/01/A_1_2.lp   
mooshak/01/A_3_3.lpmooshak/01/A_5_1.lpmooshak/01/A_6_1.lpmooshak/01/A_2_1.lp
mooshak/01/A_4_8.lp
mooshak/01/A_7_2.lp






  0%|          | 0/630 [00:00<?, ?it/s]

  0%|          | 0/630 [00:00<?, ?it/s]

  0%|          | 0/630 [00:00<?, ?it/s]

  0%|          | 0/630 [00:00<?, ?it/s]

  0%|          | 0/630 [00:00<?, ?it/s]

  0%|          | 0/630 [00:00<?, ?it/s]

  0%|          | 0/630 [00:00<?, ?it/s]

  0%|          | 0/630 [00:00<?, ?it/s]

  0%|          | 0/68 [00:00<?, ?it/s]

Generating mutations for mooshak/01/B_0_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/B_1_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/B_2_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/B_3_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/B_4_3.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/B_5_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/B_6_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for
 mooshak/01/B_7_1.lp

  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/B_8_2.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/B_8_3.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/B_8_4.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations formooshak/01/C_0_1.lp 


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/C_1_6.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/C_2_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/C_3_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/C_4_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/C_5_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/C_7_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/C_8_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/D_0_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/D_4_6.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/D_6_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/D_8_3.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/D_8_4.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for
 mooshak/01/D_8_5.lp

  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/D_8_6.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/E_0_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for
 mooshak/01/E_1_2.lp

  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/E_2_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/E_4_4.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations formooshak/01/E_5_1.lp 


  0%|          | 0/630 [00:00<?, ?it/s]


Generating mutations for mooshak/01/E_6_2.lp

  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/01/E_7_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations formooshak/01/E_8_1.lp 


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/A_13_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/A_15_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/A_16_0.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/A_19_0.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/A_19_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/A_20_0.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/A_9_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/B_13_0.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations formooshak/02/B_15_0.lp 


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/B_16_0.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/B_19_0.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Skipping mooshak/02/B_19_0.lp

Skipping mooshak/02/B_19_0.lp

Generating mutations formooshak/02/B_20_1.lp 


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/B_9_0.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/C_15_0.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/C_16_0.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/C_19_0.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/C_19_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/C_20_0.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/D_15_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/D_19_1.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/D_9_2.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations formooshak/02/E_15_1.lp 


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for
 mooshak/02/E_16_0.lp

  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/E_19_0.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/E_20_0.lp


  0%|          | 0/630 [00:00<?, ?it/s]

Generating mutations for mooshak/02/E_9_0.lp


  0%|          | 0/630 [00:00<?, ?it/s]

In [None]:
def tuplify(e):
    if len(e) == 0:
        return tuple()
    else:
        return tuple(e)


results = [{k: v if not isinstance(v, list) else tuplify(v) for k, v in r.items()} for r in results]

In [None]:
from pandas import DataFrame

df = DataFrame(results)
df_ = df.groupby(df.columns.tolist(), as_index=False).size()
df

In [None]:
df_[df_["size"] > 1]

In [None]:
df.drop_duplicates()

In [None]:
df.to_feather("dataset_3.feather")