In [1]:
# import modules from root of project
%cd ..

import time

from arena.arena import parse_stimulus_matrix, Sheet, run_sheets, collect_actuation_sheets, SheetInvocation, \
    lql_to_sheet_signature
from arena.engine.adaptation import PassThroughAdaptationStrategy
from arena.engine.artifacts import CodeCandidate, import_classes_under_test
from arena.engine.classes import ClassUnderTest
from arena.engine.ssntestdriver import interpret_sheet, run_sheet, InvocationListener, Test, TestInvocation
from arena.lql.lqlparser import parse_lql
from arena.ssn.ssnparser import parse_sheet
from arena.engine.artifacts import write_modules_and_import_cuts
from arena.provider.gai import prompt_code_units

from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import OllamaLLM

# pandas
import pandas as pd
pd.set_option('display.max_columns', 0) #replace n with the number of columns you want to see completely
pd.set_option('display.max_rows', 0) #replace n with the number of rows you want to see completely

  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


/home/marcus/PycharmProjects/arena-python


# Create Stimulus Matrix for Base64 Example

In [2]:
# use this for OpenAI instead of Ollama
# os.environ["OPENAI_API_KEY"] = "demo" # FIXME your API KEY
# llm = OpenAI(
#     model="gpt-4o-mini"
# )
# Ollama
llm = OllamaLLM(model="llama3.1") # assumes localhost
template = 'def greatest_common_divisor(a: int, b: int) -> int: """ Return a greatest common divisor of two integers a and b >>> greatest_common_divisor(3, 5) 1 >>> greatest_common_divisor(25, 15) 5 """'
prompt = ChatPromptTemplate.from_template(template)

code_solutions = prompt_code_units(llm, prompt, samples = 3) # let's obtain 3

target_folder = f"/tmp/arena-python-{round(time.time() * 1000)}"
# classes under test
cuts = write_modules_and_import_cuts(target_folder, code_solutions)

# lql (interface specification)
lql = """GCD {
        greatest_common_divisor(int,int)->int
    }
"""

# stimulus sheet (as a data frame)
ss = pd.DataFrame([
    {"A": {}, "B": "create", "C": "GCD", "D": None, "E": None},
    {"A": "5", "B": "greatest_common_divisor", "C": "A1", "D": "25", "E": "15"}
])

# create stimulus matrix
sm = parse_stimulus_matrix([Sheet("test1()", ss, lql)], cuts, [SheetInvocation("test1", "")])

## SM is a pandas DataFrame

The data frame consists of complex objects.

In [3]:
sm

Unnamed: 0_level_0,0_original,1_original,2_original
tests,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
test1,test1(),test1(),test1()


## Run Stimulus Matrix in Arena

In [4]:
# run stimulus matrix
invocation_listener = InvocationListener()
srm = run_sheets(sm, 1, invocation_listener)

## SRM is a pandas DataFrame

The data frame consists of complex objects.

In [5]:
srm

Unnamed: 0_level_0,0_original_0,1_original_0,2_original_0
tests,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
test1,Executed Invocations\n0 => ExecutedInvocation ...,Executed Invocations\n0 => ExecutedInvocation ...,Executed Invocations\n0 => ExecutedInvocation ...


## Zoom into SRM

In [6]:
# create actuation sheets, now we have the real stimulus response matrix (SRM)
srm_actuations = collect_actuation_sheets(srm)

In [7]:
srm_actuations

Unnamed: 0_level_0,0_original_0,1_original_0,2_original_0
tests,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
test1,output operation ...,output operation ...,output operation ...


### Get Actuation sheet for first cell in the DataFrame

In [8]:
srm_actuations.iat[0, 0] # pick first cell

Unnamed: 0,output,operation,service,input_0,input_1
0,$CUT@module,__init__,OPDSQYMBTO.GCD,,
1,5,greatest_common_divisor,$CUT@module,25.0,15.0


### Which Candidate Implementation is it?

In [9]:
vars(srm_actuations.columns[0]) # print properties of object

{'cut': <arena.engine.classes.ClassUnderTest at 0x7c606a757170>,
 'initializer_mapping': {},
 'method_mapping': {<arena.lql.lqlparser.MethodSignature at 0x7c606a32b4d0>: <function 0.greatest_common_divisor(a: int, b: int) -> int>},
 'adapter_id': '0'}

In [10]:
#  show class
vars(srm_actuations.columns[0].cut) # print properties of object

{'id': '0',
 'variant_id': 'original',
 'code_candidate': <arena.engine.artifacts.CodeCandidate at 0x7c606c5edfd0>,
 'class_under_test': <module '0' from '/tmp/arena-python-1742552534914/0/candidate.py'>}

### Show Source Code

In [11]:
# show source code
srm_actuations.columns[0].cut.get_source_code()

'def greatest_common_divisor(a: int, b: int) -> int:\n    """\n    Return a greatest common divisor of two integers a and b\n    \n    Args:\n        a (int): First integer.\n        b (int): Second integer.\n    \n    Returns:\n        int: Greatest common divisor of a and b.\n    \n    Examples:\n        >>> greatest_common_divisor(3, 5)\n        1\n        >>> greatest_common_divisor(25, 15)\n        5\n    """\n    while b != 0:\n        a, b = b, a % b\n    return abs(a)\n'

### Which Test is it?

In [12]:
# from Stimulus Matrix
vars(sm.iat[0, 0].test) # print properties of object

{'name': 'test1',
 'parsed_sheet': <arena.ssn.ssnparser.ParsedSheet at 0x7c606a7e9340>,
 'interface_specification': <arena.lql.lqlparser.Interface at 0x7c606a32ba10>,
 'signature': <arena.arena.SheetSignature at 0x7c606a712300>}

## Look Specific Observations

Let's look at the outputs first

In [13]:
# actuation sheet is a pandas data frame as well
srm_actuations.map(lambda x: pd.Series(x['output'])).apply(pd.Series.explode)

Unnamed: 0_level_0,0_original_0,1_original_0,2_original_0
tests,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
test1,$CUT@module,$CUT@module,$CUT@module
test1,5,5,5


In [14]:
# operation
srm_actuations.map(lambda x: pd.Series(x['operation'])).apply(pd.Series.explode)

Unnamed: 0_level_0,0_original_0,1_original_0,2_original_0
tests,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
test1,__init__,__init__,__init__
test1,greatest_common_divisor,greatest_common_divisor,greatest_common_divisor


In [15]:
# service
srm_actuations.map(lambda x: pd.Series(x['service'])).apply(pd.Series.explode)

Unnamed: 0_level_0,0_original_0,1_original_0,2_original_0
tests,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
test1,OPDSQYMBTO.GCD,DFDLLUNHTF.GCD,MREPNXHZEG.GCD
test1,$CUT@module,$CUT@module,$CUT@module


In [16]:
# first input parameter: here "input_0"
srm_actuations.map(lambda x: pd.Series(x['input_0'])).apply(pd.Series.explode)

Unnamed: 0_level_0,0_original_0,1_original_0,2_original_0
tests,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
test1,,,
test1,25.0,25.0,25.0
