In [3]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm

cwd = os.getcwd()  # get directory for storage

# Automated Assertion Generation & Extraction with ChatGPT (main code)

## Step 1) Get Asserted Code From Github
> Clean and process the code

In [4]:
from google.cloud import bigquery as bq

def get_asserted_code(num=100000, ext="%.py", verbose=True):
    query_string = """SELECT f.repo_name, c.content
FROM `bigquery-public-data.github_repos.files` AS f
JOIN `bigquery-public-data.github_repos.contents` AS c
ON f.id = c.id
WHERE
NOT c.binary
AND f.path LIKE '%.py'
AND REGEXP_CONTAINS(c.content, r'(?m)^\s*assert ')
LIMIT """ + str(num)
    
    if isinstance(num, int):
        secret_dir = "Data/secret/"
        api_key = cwd + "/" + secret_dir + os.listdir(secret_dir)[0]
        assert api_key[-5:] == ".json"  # confirm that it was found
        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = api_key
        query_string = query_string.replace("%.py", ext)

        if verbose:
            print("*Running Query:")
            print(query_string)
            print()
        client = bq.Client()
        df = (
            client.query(query_string)
            .result()
            .to_dataframe(
                create_bqstorage_client=True,
            )
        )
    elif isinstance(num, str):
        # load data from file
        df = pd.read_csv(num)
        print("Found data at", num)
    else:
        print("first param type undefined, must be string signifying directory of csv or\
               int signifying number of records to scrib from bigquery...")
        assert False
    
    if verbose:
        print("*Handling Duplicates...")
    init_len = len(df)
    df.drop_duplicates(subset=["content"], keep="first", inplace=True)
    if verbose:
        print("#Non-duplicates / #Total Retrieved =", (len(df)/init_len))
    return df

verilog_dir = cwd+"/Data/BigQuery/VerilogAssertions-ALL.csv"
python_dir = cwd+"/Data/BigQuery/PythonAssertions100k.csv"
# df = get_asserted_code(python_dir)  # 10
# df

## Step 2) Extract Assertions

In [5]:
conditionals = dict([[cond, i] for i, cond in enumerate(["==", "!=", "<=", ">=", "<", ">"])])
compounding_statements = ["and"]
bad_statements = [" or ", " in ", "isinstance"]  # TODO: properly account for OR
def parse_assertions(func, is_split=True, verbose=False):
    """
    Format: "assert [expression], [return_string]"
    
    Exceptions to Handle:
    - 'in'/'not in' keyword
    - boolean functions - ex. isinstance(var, type)
    - separation of attributes - ex. len(var), var[i]
    """
#     if verbose:
#         print("*Extracting Assertions...")
    out = []
    asserted_lines = 0
    lines = []
    for temp in func.split('\n'):  # find lines with assert in them
        if "assert" in temp:
            asserted_lines += 1
            bad_flag = False
            for bad in bad_statements:
                if bad in temp:
                    bad_flag = True
            if not bad_flag:
                lines.append(temp.strip())
    # TODO: experiment with smaller content window for assertions
    ind = 0
    while ind < len(lines):
        data = lines[ind].strip()
        start = data.find('assert')
        if start == -1:  # double checking that the assertion exists in this line
            ind += 1
            continue
        # account for combination statements
        for statement in compounding_statements:
            add_statement = data.find(statement)
            if add_statement != -1:
                extra_line = data[add_statement+len(statement):]
                lines.insert(ind+1, "assert "+extra_line)
                data = data[:add_statement].strip()

        com = data.find(',')   # parsing out return_string
        if com != -1:
            data = data[:com]
        com = data.find('#')
        if com != -1:   # parsing out comments
            if com < start:  # if the assertion itself is a comment
                ind += 1
                continue
            else:
                data = data[:com]

        if is_split:  # splitting the assertion into components for analysis
            data = [var.strip() for var in data.split(' ') if len(var.strip()) > 0]
            
            if len(data) < 1:  # edge case: nothing after 'assert' (likely typo)
                if verbose:
                    print("empty assertion found?: ", data, '\n', lines[ind])
                ind += 1
                continue
                
            if data[0] != "assert":  # edge case: something before the 'assert' statement
                ind += 1
#                 if verbose:
#                     print("something was found before the assertion on this line:\n", data)
                continue
    
            data = data[1:]  # from here on we only care about the content after the 'assert' keyword
            if len(data) < 1:  # edge case: nothing after 'assert' (likely typo)
                if verbose:
                    print("empty assertion found?: ", data, '\n', lines[ind])
                ind += 1
                continue

            condition = True  # assertion [variable] == condition by default
            if data[0] == "not":  # accounting for 'not' keyword
                condition = False
                data = data[1:]
            
            if len(data) == 1:  # adding == to simlify
                data = data + ["==", str(condition)]

            for i in range(len(data)):
                if data[i] == "is":  # simplifying is to ==
                    data[i] = "=="
                if data[i] in conditionals.keys():  # parsing common conditionals
                    data = [' '.join(data[:i]), data[i], ' '.join(data[i+1:])]  # conditionals[data[i]]
                    break

        if verbose and len(data) != 3:
            print("Weird assertion found:\n", data, '\n', lines[ind])
            print()
#             assert len(data) == 3, "found conditional-less assertion:\n" + str(data) + '\n' + str(lines[ind-1:ind+2])
        else:
            out.append(data)
        ind += 1
    return out, asserted_lines

def unassert(code, delim='', add_count=True):
    out = ""
    counter = 1
    for line in code.split('\n'):
        if "assert" not in line:
            out += '\n'
            if add_count:
                out += str(counter)
            out += delim+line
            counter += 1
    return out

def get_assertion(temp_df, verbose=False, unassert_col=True, add_stats=True):
    """ run assertion generation """
    # tester_df["assertions"] = tester_df["content"].apply(lambda code: get_assertions(code))
    
    assertions = []  # list of parsed assertions
    asserted_lines = []  # number of lines with 'assert' in them
    parsed_lines = []  # number of assertions easily parsed
    arr = []  # assertion recovery ratio
    atl = []  # assertions to size
    for i, row in tqdm(temp_df.iterrows()):
        parsed, lines = parse_assertions(row["content"], True, verbose)
        assertions.append(parsed)
        asserted_lines.append(lines)
        parsed_lines.append(len(parsed))
        arr.append(len(parsed)/lines)
        atl.append(len(parsed)/len(row["content"]))

    if unassert_col:
        temp_df["unasserted"] = temp_df["content"].apply(lambda code: unassert(code))
    
    if add_stats:
        temp_df["assertions"] = assertions
        temp_df["asserted_lines"] = asserted_lines
        temp_df["parsed_lines"] = parsed_lines
        temp_df["arr"] = arr
        temp_df["atl"] = atl
    return temp_df

# df = get_assertion(df)
# df

## Step 3) Extract Variables

In [6]:
banned_vars = ['', '*', 'self']
def old_get_variables(func, verbose=False):
    out = []
    for line in func.split('\n'):
        line = line.strip()
        if "def " in line:  # add params if its a function
            start = line.find('(')
            end = line.find(')')
            for new_param in line[start+1:end].split(','):
                default = new_param.find("=")
                if default != -1:
                    new_param = new_param[:default]
                new_param = new_param.strip()
                if new_param not in out and new_param not in banned_vars:
                    if verbose:
                        print("*Found  {", new_param, "}  at:\n", line, '\n')
                    out.append(new_param)
        else: # add variables if equals operation
            find_var = line.find(' = ')
            if find_var != -1:
                new_var = line[:find_var].strip()
                
                if ',' in new_var: # handle tuple equalities edge case (ex: a, b, c = fn_output())
                    var_list = [tuple_var.strip() for tuple_var in new_var.split(',')]
                else:
                    var_list = [new_var]
                for new_var in var_list:
                    if new_var not in out and new_var not in banned_vars:
                        if verbose:
                            print("**Found  {", new_var, "}  at:\n", line, '\n')
                        out.append(new_var)
            # TODO: handle indexing
    return out


# test
import ast

def py_extract_variables(code):  # TODO: run a proper test
    tree = ast.parse(code)
    variables = []
    for node in ast.walk(tree):
        if isinstance(node, ast.Assign):
            for target in node.targets:
                if isinstance(target, ast.Name):
                    variables.append(target.id)
    return variables

def py_get_all_variables(df, verbose=False):  # TODO test
    ret = []
    for i, row in tqdm(df.iterrows()):
        these_vars = []
        try:
            these_vars = py_extract_variables(row["content"])
        except:
            if verbose: print("get all variables was not able to use ast")
        add_vars = old_get_variables(row["content"])
        for new_var in add_vars:
            if new_var not in these_vars:
                these_vars.append(new_var)
        
        for statement in row["assertions"]:  # add variables from already present assertions
            if len(statement) > 0:
                new_var = str(statement[0])
                if new_var not in these_vars:
                    these_vars.append(new_var)
        ret.append(these_vars)
    return ret

# out = py_extract_variables(df.sample()["content"].iloc[0])
# tester_df["variables"] = tester_df["variables"].apply(lambda code: py_extract_variables(code))

# df["variables"] = py_get_all_variables(df)
# df

## Step 4) Define LLM Schema

In [7]:
# from ipynb.fs.full.Data.GitHub-Assertions import get_variables
class prompt_example:
        def __init__(self, this_in="", this_out=""):
            self.input = this_in
            self.output = this_out
            
        def composite(self):
            return "Example Input:\n" + self.input + "\nExample Output:\n" + self.output
        
class LLM_prompt:       
    def __init__(self, input_code="*Variables:\n[flag, num, i]\n*Code:\n1num = int(input(\"Enter a number: \"))  # Program to check if a number is prime or not\n2flag = False  # define a flag variable\n3\n4if num == 1:\n5    print(num, \"is not a prime number\")\n6elif num > 1: # check for factors\n7    for i in range(2, num):\n8        if (num % i) == 0:\n9            flag = True  # if factor is found, set flag to True\n10            break  # break out of loop\n11    if flag:  # check if flag is True\n12        print(num, \"is not a prime number\")\n13    else:\n14        print(num, \"is a prime number\")",
                 example_in="*Variables:\n[n]\n*Code:\n1def fibonacci(n):\n2   if n <= 1:\n3       return n\n4   else:\n5       return(recur_fibo(n-1) + recur_fibo(n-2))",
                 example_out="[1, n, >=, 1, \"the fibonacci sequence can only be done on posative integers\"]\n\nWhich would be the same as:\n1def fibonacci(n):\n2   assert n >= 1\n3   if n <= 1:\n4       return n\n5   else:\n6       return(recur_fibo(n-1) + recur_fibo(n-2))", 
                 criteria=["Assert that the function can take in all inputs necessary to complete the process",
                           "Assert that all outputs are of the proper sizes."]
                 ):
        self.criteria = criteria
        self.example = prompt_example(example_in, example_out)
        self.input_code = input_code
        
        # default params that are less likely to change
        self.intro = "You are a helpful bot that adds assertions to pieces of Python code."  
        self.input_format = "You will be given a list of variables and a string of code presented in the format:\n*Variables:\n[...]\n*Code:\n..."
        self.criteria_transition = "Generate assertions based on the following criteria:"
        self.output_format = "Your response should ONLY be a list of assertions in the format:\n[line_number, subject_variable, condition_type, target, reasoning]"
        self.output_format_description = ["line_number is an integer referencing the line after which the assertion should be inserted",
                                          "subject_variable and target can ONLY be variables from the input list, integers, booleans, or None", # TODO retest bools
                                          "condition_type can only be a value in this list: [==, >=, <=, !=]",
                                          "reasoning is a short decription of why the assertion was made"]
        self.example_transition = "Here is an example of what your input will look like and what you should return:"
        self.input_transition = "Here is the actual input you should provide assertions for:"
    
    
    
    def composite_criteria(self):
        """ return criteria as a single string"""
        ret = ""
        for i, crit in enumerate(self.criteria):
            ret += str(i+1) + ") " + crit
            if i != len(self.criteria)-1:  # ignore last instance for formatting
                ret += '\n'
        return ret
    
    def composite_output_formatting(self):
        ret = self.output_format
        for desc in self.output_format_description:
            ret += "\n -" + desc 
        return ret
    
    def prompt(self):
        """ return entire prompt"""
        return '\n'.join([self.intro, self.input_format,
                          self.criteria_transition, self.composite_criteria(), "",
                          self.composite_output_formatting(), "",
                          self.example_transition, self.example.composite(), "\n",
                          self.input_transition, self.input_code])
    
    def to_list(self):
        """ return key prompt components as a list """
        return [self.intro, self.formatting, self.criteria, self.example, self.input_code, self.prompt()]
    
    def __str__(self):
        return self.prompt()
    def __repr__(self):
        return self.prompt()

tester = LLM_prompt()
tester

# fib_input = "def fibonacci(n):\nassert n >= 1\nif n <= 1:\nreturn n\nelse:\nreturn(recur_fibo(n-1) + recur_fibo(n-2))"
# fib_output = "[1, n, 1, 1, the fibonacci sequence can only be done on posative integers]"

You are a helpful bot that adds assertions to pieces of Python code.
You will be given a list of variables and a string of code presented in the format:
*Variables:
[...]
*Code:
...
Generate assertions based on the following criteria:
1) Assert that the function can take in all inputs necessary to complete the process
2) Assert that all outputs are of the proper sizes.

Your response should ONLY be a list of assertions in the format:
[line_number, subject_variable, condition_type, target, reasoning]
 -line_number is an integer referencing the line after which the assertion should be inserted
 -subject_variable and target can ONLY be variables from the input list, integers, booleans, or None
 -condition_type can only be a value in this list: [==, >=, <=, !=]
 -reasoning is a short decription of why the assertion was made

Here is an example of what your input will look like and what you should return:
Example Input:
*Variables:
[n]
*Code:
1def fibonacci(n):
2   if n <= 1:
3       return

### Step 4.5) Make Prompts for LLM

In [8]:
def make_prompts(temp_df):
    prompts = []
    for i, row in tqdm(temp_df.iterrows()):
        # *Variables:\n[flag, num, i]\n*Code:\n
        prompt_param = "*Variables:\n" + str(row["variables"]) + "\n*Code:\n" + row["unasserted"]
        prompts.append(str(LLM_prompt(prompt_param)))
    temp_df["prompt"] = prompts
    temp_df["prompt_len"] = [len(p) for p in prompts]
    return temp_df
# df = make_prompts(df)
# df

In [9]:
# querying
import openai
import altair as alt
import json
from vega_datasets import data

OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
openai.api_key = OPENAI_API_KEY
def run_gpt4(messages):
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages
    )
    return response  # ["choices"][0]["message"]["content"]



def gpt_oneshot(input_prompt, directive="You are a helpful bot that adds assertions to pieces of code.", verbose=False):
    message_hist = [{"role": "system", "content": directive},  # add directed
                    {"role": "user", "content": input_prompt}]  # init
    response = run_gpt4(message_hist)["choices"][0]["message"]["content"]
    if verbose:
        print("chat_gpt: ", response, '\n')
#     message_hist.append({"role": "system", "content": response})
    return response

# tester = gpt_oneshot("Write python code to sort n numbers")

# PYTHON 1SHOT CODE

In [15]:

def python_one_shot(my_dir=cwd+"/Data/BigQuery/PythonAssertions100k.csv", ext="%.py", preprocessed=True):
    if not preprocessed:
        print("GETTING CODE")
        df = get_asserted_code(my_dir, ext, False)

        print("\nEXTRACTING ASSERTIONS")
        df = get_assertion(df)

        all_prompts = len(df)
        df = df[df["parsed_lines"]>0]
        all_prompts = 100*len(df)/all_prompts
        print("dropping code with no parsed assertions =>", str(all_prompts)+'%')

        print("\nEXTRACTING VARIABLES")
    #     get_vars = lambda code: py_extract_variables(code)
    #     df["variables"] = df["content"].apply(get_vars)
        df["variables"] = py_get_all_variables(df)

        df["num_vars"] = df["variables"].apply(lambda var: len(var))
        all_prompts = len(df)
        df = df[df["num_vars"] > 0]
        all_prompts = 100*len(df)/all_prompts
        print("dropping code with no extracted variables =>", str(all_prompts)+'%')

        print("\nGENERATING PROMPTS")
        df = make_prompts(df)

        prompt_limit = 8192
        all_prompts = len(df)
        df = df[df["prompt_len"] < prompt_limit]
        all_prompts = 100*len(df)/all_prompts
        print("dropping prompts over limit =>", str(all_prompts)+'%')

        df.to_csv(cwd+"/preprocessed_python_unsupervised.csv") # saving data
        print("Data checkpoint saved...\n")
    else:
        df = pd.read_csv(cwd+"/preprocessed_python_unsupervised.csv").head(10)
        print("Data checkpoint restored...\n")
    
    print("GENERATING RESPONSES")
    responses = []
    for prompt in tqdm(df["prompt"]):
        responses.append(gpt_oneshot(prompt))
    df["gpt"] = responses
    df.to_csv(cwd+"/python_prompts_withresponse.csv") # saving data
    
    return df

df = python_one_shot()
df

GETTING CODE
Found data at /Users/korahughes/Documents/GitHub/LLMCodeGen/Data/BigQuery/PythonAssertions100k.csv

EXTRACTING ASSERTIONS


33793it [00:06, 5279.99it/s]


dropping code with no parsed assertions => 89.8351729648152%

EXTRACTING VARIABLES


30358it [02:58, 169.67it/s]


dropping code with no extracted variables => 100.0%

GENERATING PROMPTS


30358it [00:02, 12311.70it/s]


dropping prompts over limit => 47.71065287568351%
Data checkpoint saved...

GENERATING RESPONSES


  1%|▍                                        | 148/14484 [37:29<60:31:50, 15.20s/it]

KeyboardInterrupt



## PARTIAL EXECUTION OF 1SHOT

In [10]:
part_size = 200  # should take about a bit under an hour
current_size = 14464
def partial_execution(part=1):  # slices of 10xt
    start = (part-1)*part_size
    end = start+part_size
#     if start > current_size:
#         return
#     elif end > current_size:
#         end = current_size+1
    df = pd.read_csv(cwd+"/Data/python_prompts_noresponse.csv").iloc[start:end, 1:]
    print("\nGenerating prompts for indexes", start, "to", end)
    
    responses = []
    for prompt in tqdm(df["prompt"]):
        responses.append(gpt_oneshot(prompt))
    df["gpt"] = responses
    
    df.to_csv((cwd+"/Data/Testing/python_prompts_withresponse_part"+str(part)+".csv"), index=False) # saving data
    print("Saved to:", (cwd+"/Data/Testing/python_prompts_withresponse_part"+str(part)+".csv"))
    return df

df = partial_execution(2)
# for i in range(1, 10):  # up to 73
#     print(partial_execution(i))
#     print('\n')


Generating prompts for indexes 200 to 400


100%|████████████████████████████████████████████| 200/200 [1:12:19<00:00, 21.70s/it]


Saved to: /Users/korahughes/Documents/GitHub/LLMCodeGen/Data/Testing/python_prompts_withresponse_part2.csv


## Basic Evaluation of Python Results

In [11]:
df = pd.read_csv(cwd+"/Data/Testing/python_prompts_withresponse_part2.csv").sort_values("prompt_len", ascending=True)
df

Unnamed: 0,repo_name,content,unasserted,assertions,asserted_lines,parsed_lines,arr,atl,variables,num_vars,prompt,prompt_len,gpt
135,weidenba/recovery_sort,from filter_system.rename import RenameFilterS...,\n1from filter_system.rename import RenameFilt...,"[['len(filter_system.filter_plugins.keys())', ...",2,2,1.000000,0.006780,['filter_system'],1,You are a helpful bot that adds assertions to ...,1582,"[[5, 'filter_system', '!=', None, ""Filter syst..."
28,algorythmic/bash-completion,import pytest\n\n\nclass TestPydoc:\n @pyte...,\n1import pytest\n2\n3\n4class TestPydoc:\n5 ...,"[['completion', '==', 'True'], ['completion', ...",2,2,1.000000,0.007576,['completion'],1,You are a helpful bot that adds assertions to ...,1618,"[[6, 'completion', '!=', None, 'input variable..."
72,SBRG/ome,# -*- coding: utf-8 -*-\n\nfrom cobradb.models...,\n1# -*- coding: utf-8 -*-\n2\n3from cobradb.m...,"[['load_the_map(None', '==', 'True']]",1,1,1.000000,0.003086,"['test_db', 'session']",2,You are a helpful bot that adds assertions to ...,1684,"[[8, 'test_db', '!=', None, ""the test_db shoul..."
150,totalgood/twote,#!/usr/bin/env python\n# -*- coding: utf-8 -*-...,\n1#!/usr/bin/env python\n2# -*- coding: utf-8...,"[['fib(1)', '==', '1'], ['fib(2)', '==', '1'],...",3,3,1.000000,0.009434,"['__author__', '__copyright__', '__license__']",3,You are a helpful bot that adds assertions to ...,1696,"[14, '-10', >=, 0, ""the fibonacci sequence can..."
97,AartGoossens/athletic_pandas,import pandas as pd\nimport pytest\n\nfrom ath...,\n1import pandas as pd\n2import pytest\n3\n4fr...,"[[""model.params['hr_rest'].value"", '==', '0.00...",7,7,1.000000,0.009986,"['heartrate', 'power', 'model', 'predictions']",4,You are a helpful bot that adds assertions to ...,1720,"[[8, 'heartrate', '==', 50, ""heartrate should ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
177,tbenthompson/cppimport,import contextlib\nimport copy\nimport logging...,\n1import contextlib\n2import copy\n3import lo...,"[['p.returncode', '==', 'returncode'], ['os.pa...",15,12,0.800000,0.001981,"['root_logger', 'handler', 'formatter', 'filen...",27,You are a helpful bot that adds assertions to ...,7725,"[[39, 'test_code', '!=', '', 'Check that test_..."
29,mattwthompson/mdtraj,##############################################...,\n1###########################################...,"[['os.system(cmd)', '==', '0'], [""atoms['charg...",6,6,1.000000,0.001037,"['get_fn', 'trj', 'ref_trj', 'ref_top', 'ref_b...",37,You are a helpful bot that adds assertions to ...,7731,"[[35, 'get_fn', '!=', None, ""Function get_fn m..."
9,Dfred/concept-robot,# PyVision License\n#\n# Copyright (c) 2006-20...,\n1# PyVision License\n2#\n3# Copyright (c) 20...,"[['len(tmp)', '>', '0']]",1,1,1.000000,0.000175,"['*points', 'tmp', 'minx', 'miny', 'maxx', 'ma...",38,You are a helpful bot that adds assertions to ...,7873,"[40, '*points', '!=', 0, ""The function needs a..."
33,nanalelfe/fofe-ner,#!/home/chwang/anaconda2/envs/tensorflow/bin/p...,\n1#!/home/chwang/anaconda2/envs/tensorflow/bi...,"[['len(offsets)', '==', 'len(sent)'], ['0', '<...",4,2,0.500000,0.000328,"['logger', 'rspecifier', 'language', 'entity2c...",34,You are a helpful bot that adds assertions to ...,7908,"[[11, 'rspecifier', '!=', '', ""function LoadED..."


In [19]:
df.describe()

Unnamed: 0,asserted_lines,parsed_lines,arr,atl,num_vars,prompt_len
count,200.0,200.0,200.0,200.0,200.0,200.0
mean,10.965,8.525,0.849637,0.002552,13.96,4531.53
std,15.496921,13.600893,0.294534,0.002973,9.514781,1732.097439
min,1.0,1.0,0.037736,0.000185,1.0,1517.0
25%,2.0,1.0,0.746528,0.000588,6.0,2944.25
50%,4.0,3.0,1.0,0.001417,11.5,4519.0
75%,14.0,8.0,1.0,0.003679,20.25,6041.25
max,99.0,95.0,2.0,0.023152,45.0,8140.0


In [29]:
ind = 11
print("Showing Data for prompt_len sorted data in Part1 Ind"+str(ind))
print("\nExtracted Assertion:")
print(df.iloc[ind]["assertions"])
print("\nVariables:")
print(df.iloc[ind]["variables"])
print("\nCode:")
print(df.iloc[ind]["content"])
print("\nCode:")
print(df.iloc[ind]["unasserted"])

print("\nGPT Response:")
print(df.iloc[ind]["gpt"])

# print("\nPROMPT:")
# print(df.iloc[ind]["prompt"])

Showing Data for prompt_len sorted data in Part1 Ind11

Extracted Assertion:
[['completion', '==', 'True'], ['completion', '==', 'True']]

Variables:
['completion']

Code:
import pytest


@pytest.mark.bashcomp(cmd="munin-node-configure")
class TestMuninNodeConfigure:
    @pytest.mark.complete("munin-node-configure --libdir ")
    def test_1(self, completion):
        assert completion

    @pytest.mark.complete(
        "munin-node-configure -",
        require_cmd=True,
        xfail=(
            "! (munin-node-configure --help 2>&1 || :) "
            "| command grep -q -- '[[:space:]]-'"
        ),
    )
    def test_2(self, completion):
        assert completion


Code:

1import pytest
2
3
4@pytest.mark.bashcomp(cmd="munin-node-configure")
5class TestMuninNodeConfigure:
6    @pytest.mark.complete("munin-node-configure --libdir ")
7    def test_1(self, completion):
8
9    @pytest.mark.complete(
10        "munin-node-configure -",
11        require_cmd=True,
12        xfail=(
13    

# Replicating for Verilog

In [10]:
class VLLM_prompt:       
    def __init__(self, input_code="*Variables:\n[flag, num, i]\n*Code:\n1num = int(input(\"Enter a number: \"))  # Program to check if a number is prime or not\n2flag = False  # define a flag variable\n3\n4if num == 1:\n5    print(num, \"is not a prime number\")\n6elif num > 1: # check for factors\n7    for i in range(2, num):\n8        if (num % i) == 0:\n9            flag = True  # if factor is found, set flag to True\n10            break  # break out of loop\n11    if flag:  # check if flag is True\n12        print(num, \"is not a prime number\")\n13    else:\n14        print(num, \"is a prime number\")",
                 example_in="*Variables:\n[clkstart, pr1, cStart, sr1]\n*Code:\nassign clkstart = clk && gGate;\nsequence sr1;\n    req ##2 gnt;\nendsequence\n\nproperty pr1;\n    @(posedge clkstart) cStart |-> sr1;\nendproperty\n\nsequence sr1;\n    req ##2 gnt;\nendsequence\n\nproperty pr1;\n    @(posedge clk) cStart |-> sr1;\nendproperty",
                 example_out="[clkstart, cStart, sr1, \"cStart should be equal to sr1 at the start of every clock cycle\"]\nWhich would be the same as adding the assertion:\nassert @(posedge clkstart) cStart |-> sr1 else $display(\"cStart should be equal to sr1 at the start of every clock cycle\");", 
                 criteria=["Assert that the function can take in all inputs necessary to complete the process",
                           "Assert that all outputs are of the proper sizes."]
                 ):
        self.criteria = criteria
        self.example = prompt_example(example_in, example_out)
        self.input_code = input_code
        
        # default params that are less likely to change
        self.intro = "You are a helpful bot that adds assertions to pieces of Verilog code."  
        self.input_format = "You will be given a string of code presented in the format:\n*Variables:\n...\n*Code:\n..."
        self.criteria_transition = "Generate assertions based on the following criteria:"
        self.output_format = "Your response should ONLY be a list of assertions in the format:\n[timing, subject_variable, target, reasoning]"
        self.output_format_description = ["timing is the clock cycle(s) at which the assertion is checked relative to the present cycle",
                                          "subject_variable and target can ONLY be variables present in the code, integers, booleans, or None",
                                          "reasoning is a short decription of why the assertion was made"]
        self.example_transition = "Here is an example of what your input will look like and what you should return:"
        self.input_transition = "Here is the actual input you should provide assertions for:"
        # 
    
    
    def composite_criteria(self):
        """ return criteria as a single string"""
        ret = ""
        for i, crit in enumerate(self.criteria):
            ret += str(i+1) + ") " + crit
            if i != len(self.criteria)-1:  # ignore last instance for formatting
                ret += '\n'
        return ret
    
    def composite_output_formatting(self):
        ret = self.output_format
        for desc in self.output_format_description:
            ret += "\n -" + desc 
        return ret
    
    def prompt(self):
        """ return entire prompt"""
        return '\n'.join([self.intro, self.input_format,
                          self.criteria_transition, self.composite_criteria(), "",
                          self.composite_output_formatting(), "",
                          self.example_transition, self.example.composite(), "\n",
                          self.input_transition, self.input_code])
    
    def to_list(self):
        """ return key prompt components as a list """
        return [self.intro, self.formatting, self.criteria, self.example, self.input_code, self.prompt()]
    
    def __str__(self):
        return self.prompt()
    def __repr__(self):
        return self.prompt()
    
def v_gen_prompt(df):
    # TODO: make variable-less prompt
    ret = []
    for i, row in tqdm(df.iterrows()):
        my_vars = row["variables"]
        my_code = row["unasserted"]
        prompt_param = "*Variables:\n" + str(row["variables"]) + "\n*Code:\n" + row["unasserted"]
        ret.append(str(VLLM_prompt(prompt_param)))
    return ret
        
        
print(str(VLLM_prompt()))

You are a helpful bot that adds assertions to pieces of Verilog code.
You will be given a string of code presented in the format:
*Variables:
...
*Code:
...
Generate assertions based on the following criteria:
1) Assert that the function can take in all inputs necessary to complete the process
2) Assert that all outputs are of the proper sizes.

Your response should ONLY be a list of assertions in the format:
[timing, subject_variable, target, reasoning]
 -timing is the clock cycle(s) at which the assertion is checked relative to the present cycle
 -subject_variable and target can ONLY be variables present in the code, integers, booleans, or None
 -reasoning is a short decription of why the assertion was made

Here is an example of what your input will look like and what you should return:
Example Input:
*Variables:
[clkstart, pr1, cStart, sr1]
*Code:
assign clkstart = clk && gGate;
sequence sr1;
    req ##2 gnt;
endsequence

property pr1;
    @(posedge clkstart) cStart |-> sr1;
endpro

## Improved Assertion & Variable Extraction for Verilog

In [48]:
import re
special_vars = ['|->', '|=>', '##', '==']  # make sure there is no overlap with 'dirt'
target_vars = ['property', 'sequence', 'assign', 'variable', 'assert', 'reg', 'posedge', 'cover', 'iff', 
                'int', 'parameter', 'wire', 'module']  # keywords where variables are the next space-delim word
unspaced_signifier = ["=", ',', '|', "#"]  # characters that might be left over from combined variables extracted (ex: a,b,c)
# TODO: remove '[int: int]'
avoid_keywords = special_vars + target_vars
spaceable = ["==", "!=", "|->"]
def clean_code(line):
    if type(line) != str:
        line = str(line)
    for s in spaceable:  # making sure 'var==var' becomes 'var == var'
        found = line.find(s)
        if found != -1:
            if found != 0 and line[found-1] != ' ':
                line = line[:found] + ' ' + line[found:]
                found += 1  # for setting the additional side
            if found != len(line) and line[found+1] != ' ':
                line = line[:found+len(s)] + ' ' + line[found+len(s):]
    return line

def v_extract_variables(verilog_code, efficient=False, verbose=False):
    variables = set()
    
    # basic regex
    if not efficient:
        variable_pattern = r'\b(?:wire|reg|integer|real|time)\s+(.*?)\s*[,;]'
        matches = re.findall(variable_pattern, verilog_code, re.MULTILINE)
        for match in matches:
            for var in match.split(','):
                variables.add(var.strip())
        if verbose: print("Regex found", matches)
    
    # TODO: figure out why this is inconsistent and not determininstic
    for line in verilog_code.split('\n'):
        potential_vars = [new_var for new_var in [val for val in clean_var(line).split(' ')] if len(new_var) > 0]
        # if verbose: print("POTENTIAL:", potential_vars)
        for i, var in enumerate(potential_vars):
            for tvar in special_vars:  # signals for variables on either side (ex: 'A |-> B')
                if tvar in var.lower():
                    if verbose: print("\nFound two-sided var signifier", tvar, "at:", line, "\nparsed into:", potential_vars)  # print("\nFound special var at", i, "in", potential_vars)
                    if i > 0 and potential_vars[i-1] not in avoid_keywords:
                        variables.add(potential_vars[i-1])
                        if verbose: print("Adding", potential_vars[i-1])
                    if i < len(potential_vars)-1 and potential_vars[i+1] not in avoid_keywords:
                        variables.add(potential_vars[i+1])
                        if verbose: print("Adding", potential_vars[i+1])
                    if efficient:
                        break  # Note: there can be multiple on one line
            for tvar in target_vars:  # signals for variables after definition (Ex: 'property A')
                if tvar == var.lower():
                    j = i+1
                    if verbose: print("\nFound one-sided var signifier", tvar, "at:", line, "\nparsed into:", potential_vars)  # print("\nFound target var at", i, "in", potential_vars)
                    while j < len(potential_vars):
                        if potential_vars[j] in target_vars:
                            break  # avoiding compounded case like 'assert property var'
                        variables.add(potential_vars[j])
                        if verbose: print("Adding", potential_vars[j])
                        if ',' not in potential_vars[j]:
                            break  # accounting for multidefine like reg A, B, C;
                        j += 1
                    if efficient:
                        break
    # additional cleaning step
    final_set = set()
    for var in variables:
        if "//" in var:
            continue  # avoid commented lines
        this_var = clean_var(var, True)
        for unspaced in unspaced_signifier:
            this_var = this_var.replace(unspaced, ' ')
        new_vars = this_var.split(' ')
        for add_var in new_vars:
            if len(add_var) > 0 and not add_var.replace('-', '').replace('.', '').isnumeric():  # .isdigit():
                final_set.add(add_var)
    return list(final_set)

dirt = ['[', ']', '(', ')', ':', ';', '@', '!', '`', '$']  # TODO: deal with var(subvar) parenthesis case
extra_dirt = ['*', '>', '<', '=', '~']
def clean_var(var, extra=False):
    my_dirt = dirt
    if extra:
        my_dirt += extra_dirt
    for d in my_dirt:
        var = var.replace(d, ' ')
    return var.strip()

def v_unassert(code):
    # TODO: derive better way of extracking asserttions & relevant properties
    return unassert(code)

def v_extract_assertions(verilog_code):
    # Regular expression to match Verilog assertions
#     assertion_pattern = r'\bassert\s*\((.*?)\)\s*;'
    
    # Find all matches
#     matches = re.findall(assertion_pattern, verilog_code, re.MULTILINE)
    matches = []
    for line in verilog_code.split('\n'):
        found = line.find('assert')
        if found != -1:
            info = line[found+6:]  # clean_var?
            found2 = info.find('else')
            if found2 == -1:
                found2 = len(info)
            matches.append(info[:found2].strip())
    return matches

def old_v_get_assertions(code):
    ret = []
    for line in code.split('\n'):
        if 'assert' in line:  # property
            ret.append(line.strip())
    return ret

# def v_get_variables(assertions):
#     ret = []
#     for statement in assertions:
#         ret.append(statement.split(' ')[0].strip())
#     return ret

# res = v_extract_variables(example, False, True)
# print("Extracted Variables:", res)

# res = v_extract_assertions(example)
# print("Extracted Assertions:", res)

## Testing Extraction Methods

In [12]:
test_df = pd.read_csv(cwd+"/Data/example-code/verilog examples/pages-gptresponse.csv")
test_df = test_df[~test_df["raw_code"].isna()]
test_df["Code"] = test_df["Code"].apply(lambda x: clean_code(x))
test_df.head()

Unnamed: 0.1,Unnamed: 0,Path,raw_code,Code,Figure,Question,Description,Book
1,1,/Users/korahughes/Documents/GitHub/LLMCodeGen/...,```verilog\nsequence sr1;\n req ##2 (gnt==1...,\nsequence sr1;\n req ##2 (gnt == 1 && req=...,The figure associated with the code present in...,What is an example of a Verilog sequence and p...,This Verilog code defines a sequence called `s...,Mehta
2,2,/Users/korahughes/Documents/GitHub/LLMCodeGen/...,```verilog\nproperty read_latency_check;\n@(po...,\nproperty read_latency_check;\n@(posedge clk)...,The figure associated with the Verilog code pr...,What is an example of a possible solution in V...,These two pieces of Verilog code define proper...,Mehta
3,3,/Users/korahughes/Documents/GitHub/LLMCodeGen/...,```verilog\n`ifdef check3\n property checkd...,\n`ifdef check3\n property checkdAck;\n ...,The image contains Verilog code divided into t...,Can you provide an example of a SystemVerilog ...,This Verilog code sets up a test bench for a b...,Mehta
4,4,/Users/korahughes/Documents/GitHub/LLMCodeGen/...,```verilog\n`ifdef overlap\nproperty pr1;\n ...,\n`ifdef overlap\nproperty pr1;\n @ (posedg...,,What is an example of defining properties for ...,This Verilog code defines two properties relat...,Mehta
5,5,/Users/korahughes/Documents/GitHub/LLMCodeGen/...,```verilog\nproperty ldpcheck;\n @(posedge cl...,\nproperty ldpcheck;\n @(posedge clk) $rose (...,The figure associated with the code present in...,Can you provide an example of a SystemVerilog ...,"This Verilog code defines a property named ""ld...",Mehta


In [52]:
example = test_df.sample().iloc[0]["Code"]
print(example)
# Question: should variable extraction extract properties too?
print("\nExtracted Variables:", v_extract_variables(example, False))


sequence ReadComplete;
    @(posedge clk) $rose(read) ##0 [-> 1] readC;
endproperty

initial
begin
    @(ReadComplete)
    begin
        -> issueNextRead;
    end
end

property checkCounter;
int LCount;
    @(posedge clk) disable iff (!rst_n)
    (
        ($rose(startCount), LCount=initCount ) ##1
        (1, LCount = LCount+1)[*0:$] ##1 (LCount ==  maxCount) |->  (Intr == '1'b1)
    );
endproperty
assert property (checkCounter);


Extracted Variables: ['read', 'rst_n', 'ReadComplete', 'LCount', 'checkCounter', 'clk', '-', 'initCount']


### Improvements for extraction
- better extraction of properties
  - adding parameters of properties as part of schema
  - inserting all variables mensioned in properties/assertions as variables
   (ex: 'clk nBurstRead |=> RdAck [=length] ##1 ReadDone' should allow for clk, nBurstRead, RdAck, and ReadDone)

# VERILOG 1SHOT CODE

In [56]:
def verilog_one_shot(my_dir=cwd+"/Data/example-code/verilog examples/pages-gptresponse.csv", preprocessed=True):
    if not preprocessed:
        print("GETTING CODE")
        vdf = pd.read_csv(my_dir).iloc[:100]

    #     vdf = vdf[~vdf["raw_code"].isna()]
        vdf = vdf.rename({'content': 'Code'}, axis='columns')

        vdf["Code"] = vdf["Code"].apply(lambda x:str(x))
        # vdf["content_len"] = vdf["content"].apply(lambda code: len(code))
        # vdf = vdf.sort_values("content_len", ascending=True)

        print("\nEXTRACTING ASSERTIONS")
        vdf["unasserted"] = vdf["Code"].apply(lambda code: v_unassert(code))  # TODO: fix

        vdf["assertions"] = vdf["Code"].apply(lambda code: v_extract_assertions(code))
        vdf["num_assertions"] = vdf["assertions"].apply(lambda code: len(code))

        all_prompts = len(vdf)
        vdf = vdf[vdf["num_assertions"] > 0]
        all_prompts = 100*len(vdf)/all_prompts
        print("\ndropping prompts with no exctracted assertions =>", str(all_prompts)+'%')

        print("\nEXTRACTING VARIABLES")
        vdf["variables"] = vdf["Code"].apply(lambda code: v_extract_variables(code))
        vdf["num_vars"] = vdf["variables"].apply(lambda code: len(code))

        all_prompts = len(vdf)
        vdf = vdf[vdf["num_vars"] > 0]
        all_prompts = 100*len(vdf)/all_prompts
        print("\ndropping prompts with no extracted variables =>", str(all_prompts)+'%')

        print("\nGENERATING PROMPTS")
        vdf["prompt"] = v_gen_prompt(vdf)
        vdf["prompt_len"] = vdf["prompt"].apply(lambda code: len(code))

        prompt_limit = 8192
        all_prompts = len(vdf)
        vdf = vdf[vdf["prompt_len"] < prompt_limit]
        all_prompts = 100*len(vdf)/all_prompts
        print("dropping prompts over limit =>", str(all_prompts)+'%')

        to_save = cwd+"/Data/preprocessing/preprocessed_verilog_unsupervised.csv"
        vdf.to_csv(to_save) # saving data
        print("Data checkpoint saved...\n")
    # else:
    #     vdf = pd.read_csv(cwd+"/Data/preprocessing/preprocessed_verilog_unsupervised.csv")
    #     print("Data checkpoint restored...")
    # print(vdf.size)
    # size = 50  # chunk size for partial execution
    # for part in range(350, vdf.shape[0], size):  # Note: rate limit timmed our at 350
    #     end = part + size
    #     if end > vdf.shape[0]:
    #         end = vdf.shape[0]
    #     part_df = vdf.iloc[part:end]
        
    #     print("\nGENERATING RESPONSES FOR CHUNK:", part, "to", end)
    #     print("avg #vars =", np.mean(part_df["num_vars"]), "avg #assertions =", np.mean(part_df["num_assertions"]) )
    #     responses = []
    #     for prompt in tqdm(part_df["prompt"]):
    #         responses.append(gpt_oneshot(prompt))
    #     part_df["gpt"] = responses
    #     part_df.to_csv(cwd+"/Data/gpt-responses/verilog-code/unsupervised_verilog_responses_rows"+
    #                     str(part)+"to"+str(end)+".csv") # saving data
    return vdf

vdf = verilog_one_shot(cwd+"/Data/BigQuery/big/VerilogAssertions-ALL.csv", False)
# print(vdf.sample().iloc[0]["Code"])
vdf

GETTING CODE

EXTRACTING ASSERTIONS

dropping prompts with no exctracted assertions => 100.0%

EXTRACTING VARIABLES


python(2603) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.



dropping prompts with no extracted variables => 100.0%

GENERATING PROMPTS


100it [00:00, 5114.13it/s]

dropping prompts over limit => 50.0%
Data checkpoint saved...






Unnamed: 0,repo_name,Code,unasserted,assertions,num_assertions,variables,num_vars,prompt,prompt_len
1,GeoCoq/GeoCoq,Require Export GeoCoq.Elements.OriginalProofs....,\n1Require Export GeoCoq.Elements.OriginalProo...,"[(~ LtA D E F A B C)., (LtA A B C A B C) by (c...",114,"[Out, TS, neq, Tf, eq, Cong, LtA, BetS, Col, OS]",10,You are a helpful bot that adds assertions to ...,2287
2,tgingold/ghdl,entity tb_concat01 is\nend tb_concat01;\n\nlib...,\n1entity tb_concat01 is\n2end tb_concat01;\n3...,"[a = x""ab9e"" severity failure;]",1,[a],1,You are a helpful bot that adds assertions to ...,1803
4,timofonic/1541UltimateII,library ieee;\r\nuse ieee.std_logic_1164.all;\...,\n1library ieee;\r\n2use ieee.std_logic_1164.a...,"[(g_type = ""Fibonacci"") or (g_type = ""Galois""), ]",2,"[new_bit, xor, &, i, downto, reg, reg'high, g_...",15,You are a helpful bot that adds assertions to ...,3665
8,tgingold/ghdl,entity tb_assert5 is\n generic (with_err : bo...,\n1 generic (with_err : boolean := False);\n2...,"[5 is, 5;, 5 is, 5, res = '0' severity failure...",10,[res],1,You are a helpful bot that adds assertions to ...,2437
10,konne88/bagpipe,Ltac subst_max :=\n repeat match goal with\n ...,\n1Ltac subst_max :=\n2 repeat match goal wit...,"[(f X = f Y), P as H' by (tac), P as H', (f X ...",4,"[f, P]",2,You are a helpful bot that adds assertions to ...,4906
12,peteut/ghdl,\n-- Copyright (C) 2001 Bill Billowitch.\n\n--...,\n1\n2-- Copyright (C) 2001 Bill Billowitch.\n...,"[F3= 3, F2 = 3, NOT((S1 = C70) and (S2 = C70))...",4,"[NOT, S1, F3, F2, subtype]",5,You are a helpful bot that adds assertions to ...,4826
13,sighingnow/sighingnow.github.io,(** Proof for the fact that square root of 2 i...,\n1(** Proof for the fact that square root of ...,"[(hypothesis_positive : forall k : positive, N...",9,"[hypothesis_positive, q_gt_0_R, ctz_p_eq_S_ctz...",9,You are a helpful bot that adds assertions to ...,4658
19,chibby0ne/vhdl-book,--! \n--! Copyright (C) 2010 - 2013 Creonic Gm...,\n1--! \n2--! Copyright (C) 2010 - 2013 Creoni...,[false],1,"[ns, false]",2,You are a helpful bot that adds assertions to ...,3761
20,promlow/books-and-tutorials,Require Export ZArith.\nRequire Export Arith.\...,\n1Require Export ZArith.\n2Require Export Ari...,"[(H':k'=0)., (H': S k' <= 0)., (Hmod:(Z_of_nat...",7,"[H', HkleSSp'', Hle', Hmod]",4,You are a helpful bot that adds assertions to ...,7206
26,nickg/nvc,package fold2_pkg is\n type int_vec_2d is a...,\n1package fold2_pkg is\n2 type int_vec_2d ...,"[G(1, 2) = 2;]",1,[G],1,You are a helpful bot that adds assertions to ...,2440


In [59]:
print(vdf.sample().iloc[0]["prompt"])

You are a helpful bot that adds assertions to pieces of Verilog code.
You will be given a string of code presented in the format:
*Variables:
...
*Code:
...
Generate assertions based on the following criteria:
1) Assert that the function can take in all inputs necessary to complete the process
2) Assert that all outputs are of the proper sizes.

Your response should ONLY be a list of assertions in the format:
[timing, subject_variable, target, reasoning]
 -timing is the clock cycle(s) at which the assertion is checked relative to the present cycle
 -subject_variable and target can ONLY be variables present in the code, integers, booleans, or None
 -reasoning is a short decription of why the assertion was made

Here is an example of what your input will look like and what you should return:
Example Input:
*Variables:
[clkstart, pr1, cStart, sr1]
*Code:
assign clkstart = clk && gGate;
sequence sr1;
    req ##2 gnt;
endsequence

property pr1;
    @(posedge clkstart) cStart |-> sr1;
endpro

## Basic Evaluation of Verilog Results

In [13]:
veridir = cwd + "/Data/gpt-responses/verilog-code/"
verilog_results = [veridir+file for file in os.listdir(veridir) if "unsupervised_verilog_responses_rows" in file]
verilog_results

['/Users/korahughes/Documents/GitHub/LLMCodeGen/Data/gpt-responses/verilog-code/unsupervised_verilog_responses_rows400to450.csv',
 '/Users/korahughes/Documents/GitHub/LLMCodeGen/Data/gpt-responses/verilog-code/unsupervised_verilog_responses_rows300to350.csv',
 '/Users/korahughes/Documents/GitHub/LLMCodeGen/Data/gpt-responses/verilog-code/unsupervised_verilog_responses_rows450to500.csv',
 '/Users/korahughes/Documents/GitHub/LLMCodeGen/Data/gpt-responses/verilog-code/unsupervised_verilog_responses_rows350to400.csv',
 '/Users/korahughes/Documents/GitHub/LLMCodeGen/Data/gpt-responses/verilog-code/unsupervised_verilog_responses_rows650to700.csv',
 '/Users/korahughes/Documents/GitHub/LLMCodeGen/Data/gpt-responses/verilog-code/unsupervised_verilog_responses_rows50to100.csv',
 '/Users/korahughes/Documents/GitHub/LLMCodeGen/Data/gpt-responses/verilog-code/unsupervised_verilog_responses_rows150to200.csv',
 '/Users/korahughes/Documents/GitHub/LLMCodeGen/Data/gpt-responses/verilog-code/unsupervise

In [21]:
vdf = pd.read_csv(verilog_results[0]).sort_values("num_vars", ascending=False)
vdf.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,repo_name,Code,unasserted,assertions,num_assertions,variables,num_vars,prompt,prompt_len,gpt
7,407,2314,EliasLuiz/TCC,----------------------------------------------...,\n--------------------------------------------...,"[""ed while empty='0', and"", 'ed, the last read...",4,"[':= 1', ':= 0', ':= 0 -- 1 = first word f...",6,You are a helpful bot that adds assertions to ...,6866,The assertions for the given VHDL code would d...
23,423,2367,verilator/verilator,// DESCRIPTION: Verilator: Verilog Test module...,\n// DESCRIPTION: Verilator: Verilog Test modu...,"['', '#0 (1);', 'final (1);', 'ion']",4,"['reset = (cyc < 2)', 'cyc', 'toggle', 'C1', '...",6,You are a helpful bot that adds assertions to ...,5538,"[['reset = (cyc < 2)', 'cyc', True, 'cyc shoul..."
0,400,2283,peteut/ghdl,\n-- Copyright (C) 2001 Bill Billowitch.\n\n--...,\n\n-- Copyright (C) 2001 Bill Billowitch.\n\n...,"['F3= 3', 'F2 = 3', 'NOT((S1 = C66) and (S2 = ...",4,"[')', ':= 3', 'range <>) of record_cons_array'...",5,You are a helpful bot that adds assertions to ...,6300,"[1, 'F1', 3, ""F1 should be initialized to 3""],..."
6,406,2312,nickg/nvc,\n-- Copyright (C) 2001 Bill Billowitch.\n\n--...,\n\n-- Copyright (C) 2001 Bill Billowitch.\n\n...,"['F3= 3', 'F2 = 3', 'NOT((S1 = C66) and (S2 = ...",4,"[')', ':= 3', ':= 3.0', ':= 3 ns', 'range <>) ...",5,You are a helpful bot that adds assertions to ...,7244,"['F1', 'F2', 'F3', ""All input and output varia..."
17,417,2346,litex-hub/pythondata-cpu-blackparrot,// MBT 11/9/2014\n//\n// Synchronous 1-port ra...,\n// MBT 11/9/2014\n//\n// Synchronous 1-port ...,"[""( (v_i !== 1'b1) || (reset_i === 'X) || (res...",1,"['ata_out', 'ead_en', 'unused0 = &{clk_i', 'at...",5,You are a helpful bot that adds assertions to ...,3700,"[['BSG_SAFE_MINUS(width_p, 1):0', data_i, None..."


In [33]:
ind = 4
print("Showing Data for content_len sorted data Ind"+str(ind))
print("\nExtracted Assertion:")
print(vdf.iloc[ind]["assertions"])

print("\nVariables:")
print(vdf.iloc[ind]["variables"])

print("\nGPT Response:")
print(vdf.iloc[ind]["gpt"])

print("\nCode:")
print(vdf.iloc[ind]["Code"])  # unasserted


# print("\nPROMPT:")
# print(vdf.iloc[ind]["prompt"])

Showing Data for content_len sorted data Ind4

Extracted Assertion:
["( (v_i !== 1'b1) || (reset_i === 'X) || (reset_i === 1'b1) || (addr_i < els_p))"]

Variables:
['ata_out', 'ead_en', 'unused0 = &{clk_i', 'ata_o', 'unused = reset_i']

GPT Response:
[['BSG_SAFE_MINUS(width_p, 1):0', data_i, None, "Input data_i must be proper size per width_p parameter"],
 ['addr_width_lp-1:0', addr_i, None, "Input addr_i must be proper size per addr_width_lp parameter"],
 ['BSG_SAFE_MINUS(width_p, 1):0', data_o, None, "Output data_o must be proper size per width_p parameter"],
 ['clk_i', reset_i, None, "Reset_i signal changes should be synced with the clock clk_i"],
 ['data_out', mem[addr_r], None, "data_out must equal mem array value at addr_r address"],
 [None, read_en, v_i & ~w_i, "read_en signal value should reflect read enable condition"],
 ['clk_i', 'v_i & w_i', mem[addr_i], "Memory write operations should occur at positive edges of clk_i if both v_i and w_i are high"]]

Code:
// MBT 11/9/2014
/

# Evaluate GPT's Response

## Step 5) Restore the assertion(s) generated to code and evaluate
> Metrics of evaluation, does it run? does it add to the code? is it ground-truth-like? human evaluator rank? gpt evaluator rank?

In [25]:
def get_gpt_assertions(response, code):
    """ takes in chat gpt's response and outputs its assertions as well as a string of code with said assertions in it """
    asserts = []
    parsed_code = code.split('\n')
    for line in response.split('\n'):
        line.replace('[', '').replace(']', '')
        separated = line.split(',')
        full_assert = separated[1:-1] # ommit reasoning
        
        # TODO: handle case where there are other ints in the code
        line_num = separated[0]
        num_size = len(str(line_num))
        has_found = False
        for i, line in enumerate(parsed_code):
            if line_num in line[:num_size+1]:
                parsed_code.insert(i+1, full_assert)
                asserts.append(full_assert)
                has_found = True
                break
        if not has_found:
            print("Could not find location of\n", full_assert, "\nin\n", code)
    return '\n'.join(parsed_code), asserts

example_response = tester_df.sample()
print(example_response["gpt"].iloc[0])
temp_test = get_gpt_assertions(example_response["gpt"].iloc[0], example_response["unasserted"].iloc[0])
print(temp_test)

In [None]:
gpt_asserted_code = []  # snippets of code greated by the response assertions from gpt
gpt_assertions = []  # the decoded assertions themselves
gpt_num_assertions = []  # the number of assertions gpt generated
gpt_ratio_assertions = []   # num_gen_assertions / num_parsed_assertions
gpt_matched_assertions = []  # assertions that roughly equal ground-truth
gpt_matched_assertions_ratio = []  # num_matched_assertions / num_ground_truth_assertions

for i, row in tester_df.iterrows():
    new_code, asserts = get_gpt_assertions(row["gpt"], row["Unasserted"])
    gpt_asserted_code.append(new_code)
    gpt_assertions.append(asserts)
    gpt_num_assertions.append(len(asserts))
    gpt_ratio_assertions.append(len(asserts)/row["parsed_lines"])
    # TODO get number of matching assertions
    matched_num = ...
    gpt_matched_assertions.append(matched_num)
    gpt_matched_assertions_ratio.append(matched_num/len(asserts))
tester_df["gpt_asserted_code"] = gpt_asserted_code
tester_df["gpt_assertions"] = gpt_assertions
tester_df["gpt_num_assertions"] = gpt_num_assertions
tester_df["gpt_ratio_assertions"] = gpt_ratio_assertions
tester_df["gpt_matched_assertions"] = gpt_matched_assertions
tester_df["gpt_matched_assertions_ratio"] = gpt_matched_assertions_ratio

In [None]:
tester = df.iloc[ind]["assertions"][1:-1].split('], [')
# print(tester)
print(df.iloc[ind]["assertions"][1:-1])
# def revive_assertion(my_list):
#     my_list[1:-1]

# for i, row in df.iterrows():
#     for list(assertion) in row["assertions"]:
#         to_find = assertion.replace('[','').replace(']','')
#         if to_find in row["gpt"]:
#             print("Found a match!")
#             print(assertion)
#             print("found at")
#             print(row["gpt"])
# print("\nDONE")

In [None]:
""" TODO: test word-mover's distance comparison? """

## Step 6) Compare to Ground Truth

## Step 7) Derive Semantic Statistics & Determine Assertion Quality