In [1]:
import enum
import importlib
import inspect
import os
import shutil

from typing import List
from IPython.display import HTML
from sflkit.analysis.spectra import Line

from sflkit.analysis import analysis_type, factory
from sflkit import *
from sflkit.color import ColorCode
from sflkit import instrument_config, analyze_config
from sflkit.config import Config

from avicenna import *

## A faulty Program

First, we need a faulty program. We chose an implementation of the `middle(x, y, z)` function that returns the *middle* number of its three arguments. For example, `middle(1, 3, 2)` should return 2 because `1 < 2` and `2 < 3`. We introduced a fault in this implementation of `middle` that occurs in line 7 `m = y`. 

In [2]:
from typing import List

class Term:
    def evaluate(self) -> float | int:
        pass


class Binary(Term):
    def __init__(self, left: Term, right: Term):
        self.left = left
        self.right = right


class Add(Binary):
    def evaluate(self):
        l, r = self.left.evaluate(), self.right.evaluate()
        return l + r


class Sub(Binary):
    def evaluate(self):
        l, r = self.left.evaluate(), self.right.evaluate()
        return l - r


class Mul(Binary):
    def evaluate(self):
        l, r = self.left.evaluate(), self.right.evaluate()
        return l * r


class Div(Binary):
    def evaluate(self):
        l, r = self.left.evaluate(), self.right.evaluate()
        return l / r


class Neg(Term):
    def __init__(self, term: Term):
        self.term = term

    def evaluate(self) -> float | int:
        t = self.term.evaluate()
        return -t


class Constant(Term):
    def __init__(self, value: int):
        self.value = value

    def evaluate(self) -> float | int:
        return self.value
    
    
    from typing import List



def parse(s: str):
    s = s.replace("(", " ( ")
    s = s.replace(")", " ) ")
    while "  " in s:
        s = s.replace("  ", " ")
    s = s.strip()
    tokens = list(reversed(s.split(" ")))
    assert tokens
    term = parse_add_sub(tokens)
    assert not tokens
    return term


def parse_terminal(tokens) -> Term:
    token = tokens.pop(0)
    if token.isnumeric():
        return Constant(int(token))
    elif token == ")":
        term = parse_add_sub(tokens)
        token = tokens.pop(0)
        assert token == "("
        return term
    else:
        assert False


def parse_neg(tokens) -> Term:
    term = parse_terminal(tokens)
    if tokens and tokens[0] in "~":
        tokens.pop(0)
        return Neg(term)
    else:
        return term


def parse_mul_div(tokens: List[str]) -> Term:
    term = parse_neg(tokens)
    if tokens and tokens[0] in "*/":
        token = tokens.pop(0)
        if token == "*":
            return Mul(parse_mul_div(tokens), term)
        else:
            return Div(parse_mul_div(tokens), term)
    else:
        return term


def parse_add_sub(tokens: List[str]) -> Term:
    term = parse_mul_div(tokens)
    if tokens and tokens[0] in "+-":
        token = tokens.pop(0)
        if token == "+":
            return Add(parse_add_sub(tokens), term)
        else:
            return Sub(parse_add_sub(tokens), term)
    else:
        return term

In [3]:
result = parse('(3 - 5) * ~ 5') # bug


In [4]:
result1 = parse('152 + 730')

In [5]:
result1.evaluate()

882

In [6]:
import string
from typing import List

grammar_expression = {
            "<start>": ["<arith_expr>"],
            "<arith_expr>": [
                "<arith_expr><operator><arith_expr>",
                "<number>",
                "(<arith_expr>)",
            ],
            "<operator>": [" + ", " - ", " * ", " / "],
            "<number>": ["<maybe_minus><non_zero_digit><maybe_digits>", "0"],
            "<maybe_minus>": ["", "~ "],
            "<non_zero_digit>": [
                str(num) for num in range(1, 10)
            ],  # Exclude 0 from starting digits
            "<digit>": list(string.digits),
            "<maybe_digits>": ["", "<digits>"],
            "<digits>": ["<digit>", "<digit><digits>"],
}

In [7]:
# from avicenna.avix import *

# AviX.create_event_file(inp='sqrt(9)',
#                        instrumented_function='main',
#                        conversion_func=None,
#                        event_path='rsc/event_file'
#                        )



In [8]:
put = 'expression.py'
tmp_py = 'rsc/instrumented.py'
language='python'
predicates='line'
metrics='Tarantula'
#passing='event-files/0,event-files/1'
failing='event-files/ev_file'
def get_config():
    return Config.create(path=put,
                         working=tmp_py,
                         language=language,
                         predicates=predicates,
                         metrics=metrics,
                         failing=failing
                         )
    
def instrument(out=True):
    instrument_config(get_config())
    if out:
        with open(tmp_py, 'r') as fp:
            print(fp.read())

In [9]:
instrument()

sflkit :: INFO     :: I found 56 events in expression.py.
sflkit :: INFO     :: I found 56 events in expression.py.


import sflkitlib.lib
sflkitlib.lib.add_line_event(0)
from typing import List

class Term:

    def evaluate(self) -> float | int:
        sflkitlib.lib.add_line_event(1)
        pass

class Binary(Term):

    def __init__(self, left: Term, right: Term):
        sflkitlib.lib.add_line_event(2)
        self.left = left
        sflkitlib.lib.add_line_event(3)
        self.right = right

class Add(Binary):

    def evaluate(self):
        sflkitlib.lib.add_line_event(4)
        (l, r) = (self.left.evaluate(), self.right.evaluate())
        sflkitlib.lib.add_line_event(5)
        return l + r

class Sub(Binary):

    def evaluate(self):
        sflkitlib.lib.add_line_event(6)
        (l, r) = (self.left.evaluate(), self.right.evaluate())
        sflkitlib.lib.add_line_event(7)
        return l - r

class Mul(Binary):

    def evaluate(self):
        sflkitlib.lib.add_line_event(8)
        (l, r) = (self.left.evaluate(), self.right.evaluate())
        sflkitlib.lib.add_line_event(9)
        

In [10]:
def analyzer_conf(conf: Config, factory):
    analyzer = Analyzer(irrelevant_event_files=conf.failing, 
                        relevant_event_files=conf.passing,
                        factory=factory)
    return analyzer

In [11]:
factory = factory.LineFactory()
line_event = analysis_type.AnalysisType(0)
line_event

LINE

In [12]:
analyzer = analyzer_conf(get_config(), factory=factory)

In [13]:
# results = analyzer.analyze()
# results

# analyzer needs event files 
analyzer.analyze()

In [14]:
coverage: List[Line] = analyzer.get_coverage()
coverage = {line.line for line in coverage}
print(coverage)

{49, 60, 61, 62, 64, 65, 66, 67, 68, 69, 73, 74, 75, 76, 77, 78, 79, 80, 86, 87, 91, 95, 96, 103, 107, 108, 115}


In [15]:
def middle_inp_conv(inp):
    inp = inp.__str__()
    middle_input = inp.split(',')
    
    converted_inp = [
        int(middle_input[0]),
        int(middle_input[1]),
        int(middle_input[2])
    ]
    
    return converted_inp

In [16]:
# from avicenna.avix import AviX
# AviX.create_event_file( instrumented_function='main',
#                         #instr_path=instrumented_file_path,
#                         inp='sqrt(9)', 
#                         conversion_func=None, 
#                         event_path='rsc/event_file',
# )

In [17]:
# from typing import List

# from sflkit.analysis.analysis_type import AnalysisType
# from sflkit.analysis.spectra import Line

# analyzer2 = analyzer.analyze()
# print("hi")
# coverage: List[Line] = analyzer2.get_coverage(AnalysisType.LINE)
# coverage = {line.line for line in coverage}
# print(coverage)
from avicenna.avix import *
from avicenna.oracle_construction import * 
avix_oracle = construct_oracle(
                            program_under_test='parse',
                            # inp_converter=None,
                            # timeout=10,
                            line = 77,
                            put_path='expression.py'
                            # resource_path='rsc/',
                            # program_oracle= None,
                            )


In [18]:
avix_oracle('3 + 5')


<OracleResult.PASSING: 'PASSING'>

In [19]:
avix_oracle('(6 / 2)')

<OracleResult.FAILING: 'FAILING'>

In [20]:
def calc_oracle(inp):
    # convert inputs
    
    return eval(inp)
    
    inputs = input.split()
    result_list = [x, y, z]
    result_list.sort()
    return result_list[1]

In [21]:
import math
calc_oracle('math.sqrt(9)')

3.0

In [22]:
regular_oracle = construct_oracle(
                            program_under_test=parse,
                            timeout=10,
                            program_oracle=calc_oracle,
                            default_oracle_result=OracleResult.PASSING)



In [23]:
inputs_expression = [
            '(9 - 5)', '16 - 5', '0 - (2 - 3)', '(7 - 2) - 3', # all -
            '(9 + 5)', '16 + 5', '0 + (2 + 3)', '(7 + 2) + 3', # all +
            '(9 * 5)', '16 * 5', '0 * (2 * 3)', '(7 * 2) * 3', # all *
            '(9 / 5)', '16 / 5', '0 / (2 / 3)', '(7 / 2) / 3', # all /
            '(9 / 5) + 8', '16 / 5 - 5', '0 / (2 * 0)', '7 / 2 / 0',   # div by 0
            '9 + 8', 
        ]

In [24]:
avix_oracle(inputs_expression[0])

<OracleResult.FAILING: 'FAILING'>

In [25]:
# avicenna = Avicenna(
#     grammar=grammar_calc,
#     initial_inputs=inputs_calc,
#     oracle=avix_oracle,
#     max_iterations=10,
#     top_n_relevant_features=3,
# )

In [26]:
avix = AviX(
    grammar=grammar_expression,
    initial_inputs=inputs_expression,
    oracle=avix_oracle,
    max_iterations=10,
    top_n_relevant_features=3,
    put_path='expression.py',    
)

sflkit :: INFO     :: I found 56 events in expression.py.
sflkit :: INFO     :: I found 56 events in expression.py.


In [27]:
# avicenna2 = Avicenna(
#     grammar=middle_grammar,
#     initial_inputs=middle_inputs,
#     oracle=regular_oracle,
#     max_iterations=10,
#     top_n_relevant_features=3, 
# )

In [28]:
import warnings

import logging
# Suppress the specific SHAP warning
warnings.filterwarnings(
    "ignore",
    message="LightGBM binary classifier with TreeExplainer shap values output has changed to a list of ndarray",
)
warnings.filterwarnings(
    "ignore", 
    message="No further splits with positive gain, best gain: -inf"
)

In [29]:
logging.basicConfig(filename='avicenna.log', filemode='w', encoding='utf-8', level=logging.INFO, force=True)
# only 2 constraints used in the end why
best_invariant = avix.explain() # unparse with islaunparse for further use


TypeError: evaluate_z3_expression.<locals>.not_implemented_failure() takes from 0 to 1 positional arguments but 2 were given

In [None]:
#another_invariant = avicenna2.explain() # unparse with islaunparse for further use

In [None]:
#print(another_invariant)

In [None]:
print(best_invariant)
print(best_invariant[0])
print(best_invariant[1])
print(best_invariant[2])

None


TypeError: 'NoneType' object is not subscriptable

In [None]:
# solver1 = ISLaSolver(
#     grammar=grammar_calc,
#     formula=another_invariant[0],
#     enable_optimized_z3_queries=False,
# )
from isla.solver import ISLaSolver

solver2 = ISLaSolver(
    grammar=grammar_expression,
    formula=best_invariant[0],
    enable_optimized_z3_queries=False,
)


In [None]:
# grammar_calc = {
#     "<start>": ["<arith_expr>"],
#     "<arith_expr>": ["<function>(<number>)"],
#     "<function>": ["sqrt", "sin", "cos", "tan"],
#     "<number>": ["<maybe_minus><one_nine><maybe_digits><maybe_frac>", 
#                  "-0",
#                  "0"],
#     "<maybe_minus>": ["",
#                       "-"
#                       ],
#     "<maybe_frac>": ["", ".<digits>"],
#     "<one_nine>": [str(num) for num in range(1, 10)],
#     "<digit>": [digit for digit in string.digits],
#     "<maybe_digits>": ["", "<digits>"],
#     "<digits>": ["<digit>", "<digit><digits>"],
# }

In [None]:
solver2.formula

ConjunctiveFormula(ForallFormula(BoundVariable("elem_1", "<number>"), Constant("start", "<start>"), ExistsFormula(BoundVariable("elem_2", "<one_nine>"), Constant("start", "<start>"), SMTFormula('(>= (str.to_int elem_1) (str.to_int elem_2))', BoundVariable("elem_1", "<number>"), BoundVariable("elem_2", "<one_nine>"), ))), ExistsFormula(BoundVariable("elem", "<function>"), Constant("start", "<start>"), SMTFormula('(= elem "sqrt")', BoundVariable("elem", "<function>"), )))

In [None]:
results2 = []

# # print(best_invariant[0])
# # print(another_invariant[0])
# for _ in range(1,10):
#     results1.append(solver1.solve())
    
#    avix_oracle()
for _ in range(1,10):
    results2.append(solver2.solve())

StopIteration: 

In [None]:
for input in results2:
    if avix_oracle(input) == OracleResult.FAILING:
        print(input)
        print(avix_oracle(input))

In [None]:
for input in results2:
    if avix_oracle(input) == OracleResult.FAILING:
        print(input)
        print(avix_oracle(input))
for input in results2:
    print(input)

In [None]:
solver = ISLaSolver(
    grammar=middle_grammar,
    formula=best_invariant[0],
    enable_optimized_z3_queries=False,
)

# this isnt working rn why not raaaaaaaaaaaaaaaaaaaaaaa
# should be inputs of type 2, 3, 1
for _ in range(1,10):
    print(solver.solve())


NameError: name 'middle_grammar' is not defined

In [None]:
# call func for middle, converts string input to usable integer values

def call_func_middle(inp: str):
    
    inp = inp.__str__()
    
    middle_input = inp.split(',')
    converted_inp =  [int(middle_input[0]), int(middle_input[1]), int(middle_input[2])]
    return converted_inp

In [None]:
test_path = Path('rsc/')
print(Path.exists(test_path))


True


In [None]:
from avicenna.avix import *
from avicenna.oracle_construction import * 

In [None]:
def middle_inp_conv(inp):
    inp = inp.__str__()
    middle_input = inp.split(',')
    
    converted_inp = [
        int(middle_input[0]),
        int(middle_input[1]),
        int(middle_input[2])
    ]
    
    return converted_inp

In [None]:
import tmp
importlib.reload(tmp)
tmp.sflkitlib.lib.reset()
avix_oracle = construct_oracle(
                            program_under_test='middle',
                            inp_converter=middle_inp_conv,
                            timeout=10,
                            line = 7,
                            resource_path='rsc/',
                            program_oracle= None)


In [None]:
avix_oracle("6,3,7")

<OracleResult.FAILING: 'FAILING'>

In [None]:
# # double check if this works

# AviX.create_event_file(instrumented_function='middle',
#                        #instr_path='tmp',
#                        inp = '2,1,3',
#                        conversion_func=middle_inp_conv,
#                        event_path='rsc/event_file'
#                        )

In [None]:
avix = AviX(grammar=middle_grammar,
            initial_inputs=middle_inputs,
            oracle=avix_oracle,
            max_iterations=10,
            desired_line=7,
            put_path='middle.py',
            #instr_path='instrumented.py',
            min_precision = 0.7,)

In [None]:
from fuzzingbook.GrammarFuzzer import GrammarFuzzer, DerivationTree

middle_grammar_converted = {
    "<start>": ["<stmt>"],
    "<stmt>": ["str.to.int(<x>),str.to.int(<y>),str.to.int(<z>)"],
    "<x>": ["<integer>"],
    "<y>": ["<integer>"],
    "<z>": ["<integer>"],
    "<integer>": ["<digit>", "<digit><integer>"],
    "<digit>": [str(num) for num in range(1, 10)]
}

fuzzer = GrammarFuzzer(middle_grammar)

for i in range(10):
    print(fuzzer.fuzz())

14,977,74
9,1,1
86,87619,98
75,88,2
4,3,7
4,8,74
67,11,97
6,25,182
7958,6,82
5,46,2


https://github.com/uds-se/sflkit

<img src="qrcode.png" style="width:500px">