In [1]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm

cwd = os.getcwd()  # get directory for storage

# This file automates the entire pipeline for assertion generation with chatgpt

## Step 1) Get Asserted Code From Github

### Step 1.1) Clean and process the code
### Step 1.2) Extract Ground-Truth Assertions & Relevant Statistics

In [2]:
from google.cloud import bigquery as bq

def get_asserted_code(num=100000, ext="%.py", verbose=True):
    query_string = """SELECT f.repo_name, c.content
FROM `bigquery-public-data.github_repos.files` AS f
JOIN `bigquery-public-data.github_repos.contents` AS c
ON f.id = c.id
WHERE
NOT c.binary
AND f.path LIKE '%.py'
AND REGEXP_CONTAINS(c.content, r'(?m)^\s*assert ')
LIMIT """ + str(num)
    
    if isinstance(num, int):
        secret_dir = "Data/secret/"
        api_key = cwd + "/" + secret_dir + os.listdir(secret_dir)[0]
        assert api_key[-5:] == ".json"  # confirm that it was found
        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = api_key
        query_string = query_string.replace("%.py", ext)

        if verbose:
            print("*Running Query:")
            print(query_string)
            print()
        client = bq.Client()
        df = (
            client.query(query_string)
            .result()
            .to_dataframe(
                create_bqstorage_client=True,
            )
        )
    elif isinstance(num, str):
        # load data from file
        df = pd.read_csv(num)
        print("Found data at", num)
    else:
        print("first param type undefined, must be string signifying directory of csv or\
               int signifying number of records to scrib from bigquery...")
        assert False
    
    if verbose:
        print("*Handling Duplicates...")
    init_len = len(df)
    df.drop_duplicates(subset=["content"], keep="first", inplace=True)
    if verbose:
        print("#Non-duplicates / #Total Retrieved =", (len(df)/init_len))
    return df

verilog_dir = cwd+"/Data/BigQuery/VerilogAssertions-ALL.csv"
python_dir = cwd+"/Data/BigQuery/PythonAssertions100k.csv"
# df = get_asserted_code(python_dir)  # 10
# df

In [3]:
conditionals = dict([[cond, i] for i, cond in enumerate(["==", "!=", "<=", ">=", "<", ">"])])
compounding_statements = ["and"]
bad_statements = [" or ", " in ", "isinstance"]  # TODO: properly account for OR
def parse_assertions(func, is_split=True, verbose=False):
    """
    Format: "assert [expression], [return_string]"
    
    Exceptions to Handle:
    - 'in'/'not in' keyword
    - boolean functions - ex. isinstance(var, type)
    - separation of attributes - ex. len(var), var[i]
    """
#     if verbose:
#         print("*Extracting Assertions...")
    out = []
    asserted_lines = 0
    lines = []
    for temp in func.split('\n'):  # find lines with assert in them
        if "assert" in temp:
            asserted_lines += 1
            bad_flag = False
            for bad in bad_statements:
                if bad in temp:
                    bad_flag = True
            if not bad_flag:
                lines.append(temp.strip())
    # TODO: experiment with smaller content window for assertions
    ind = 0
    while ind < len(lines):
        data = lines[ind].strip()
        start = data.find('assert')
        if start == -1:  # double checking that the assertion exists in this line
            ind += 1
            continue
        # account for combination statements
        for statement in compounding_statements:
            add_statement = data.find(statement)
            if add_statement != -1:
                extra_line = data[add_statement+len(statement):]
                lines.insert(ind+1, "assert "+extra_line)
                data = data[:add_statement].strip()

        com = data.find(',')   # parsing out return_string
        if com != -1:
            data = data[:com]
        com = data.find('#')
        if com != -1:   # parsing out comments
            if com < start:  # if the assertion itself is a comment
                ind += 1
                continue
            else:
                data = data[:com]

        if is_split:  # splitting the assertion into components for analysis
            data = [var.strip() for var in data.split(' ') if len(var.strip()) > 0]
            
            if len(data) < 1:  # edge case: nothing after 'assert' (likely typo)
                if verbose:
                    print("empty assertion found?: ", data, '\n', lines[ind])
                ind += 1
                continue
                
            if data[0] != "assert":  # edge case: something before the 'assert' statement
                ind += 1
#                 if verbose:
#                     print("something was found before the assertion on this line:\n", data)
                continue
    
            data = data[1:]  # from here on we only care about the content after the 'assert' keyword
            if len(data) < 1:  # edge case: nothing after 'assert' (likely typo)
                if verbose:
                    print("empty assertion found?: ", data, '\n', lines[ind])
                ind += 1
                continue

            condition = True  # assertion [variable] == condition by default
            if data[0] == "not":  # accounting for 'not' keyword
                condition = False
                data = data[1:]
            
            if len(data) == 1:  # adding == to simlify
                data = data + ["==", str(condition)]

            for i in range(len(data)):
                if data[i] == "is":  # simplifying is to ==
                    data[i] = "=="
                if data[i] in conditionals.keys():  # parsing common conditionals
                    data = [' '.join(data[:i]), data[i], ' '.join(data[i+1:])]  # conditionals[data[i]]
                    break

        if verbose and len(data) != 3:
            print("Weird assertion found:\n", data, '\n', lines[ind])
            print()
#             assert len(data) == 3, "found conditional-less assertion:\n" + str(data) + '\n' + str(lines[ind-1:ind+2])
        else:
            out.append(data)
        ind += 1
    return out, asserted_lines

def unassert(code, delim=''):
    out = ""
    counter = 1
    for line in code.split('\n'):
        if "assert" not in line:
            out += '\n'+str(counter)+delim+line
            counter += 1
    return out

def get_assertion(temp_df, verbose=False, unassert_col=True, add_stats=True):
    """ run assertion generation """
    # tester_df["assertions"] = tester_df["content"].apply(lambda code: get_assertions(code))
    
    assertions = []  # list of parsed assertions
    asserted_lines = []  # number of lines with 'assert' in them
    parsed_lines = []  # number of assertions easily parsed
    arr = []  # assertion recovery ratio
    atl = []  # assertions to size
    for i, row in tqdm(temp_df.iterrows()):
        parsed, lines = parse_assertions(row["content"], True, verbose)
        assertions.append(parsed)
        asserted_lines.append(lines)
        parsed_lines.append(len(parsed))
        arr.append(len(parsed)/lines)
        atl.append(len(parsed)/len(row["content"]))

    if unassert_col:
        temp_df["unasserted"] = temp_df["content"].apply(lambda code: unassert(code))
    
    if add_stats:
        temp_df["assertions"] = assertions
        temp_df["asserted_lines"] = asserted_lines
        temp_df["parsed_lines"] = parsed_lines
        temp_df["arr"] = arr
        temp_df["atl"] = atl
    return temp_df

# tester_df = df.copy()
# tester_df = get_assertion(tester_df)
# tester_df

## Step 2) Generate LLM Prompt & Query a GPT

In [9]:
banned_vars = ['', '*', 'self']
# TODO: add variables from extracted assertions - duplicates
def old_get_variables(func, verbose=False):
    out = []
    for line in func.split('\n'):
        line = line.strip()
        if "def " in line:  # add params if its a function
            start = line.find('(')
            end = line.find(')')
            for new_param in line[start+1:end].split(','):
                default = new_param.find("=")
                if default != -1:
                    new_param = new_param[:default]
                new_param = new_param.strip()
                if new_param not in out and new_param not in banned_vars:
                    if verbose:
                        print("*Found  {", new_param, "}  at:\n", line, '\n')
                    out.append(new_param)
        else: # add variables if equals operation
            find_var = line.find(' = ')
            if find_var != -1:
                new_var = line[:find_var].strip()
                
                if ',' in new_var: # handle tuple equalities edge case (ex: a, b, c = fn_output())
                    var_list = [tuple_var.strip() for tuple_var in new_var.split(',')]
                else:
                    var_list = [new_var]
                for new_var in var_list:
                    if new_var not in out and new_var not in banned_vars:
                        if verbose:
                            print("**Found  {", new_var, "}  at:\n", line, '\n')
                        out.append(new_var)
            # TODO: handle indexing
    return out


# test
import ast

def get_variables(code):  # TODO: run a proper test
    tree = ast.parse(code)
    variables = []
    for node in ast.walk(tree):
        if isinstance(node, ast.Assign):
            for target in node.targets:
                if isinstance(target, ast.Name):
                    variables.append(target.id)
    return variables

def get_all_variables(df):  # TODO test
    ret = []
    for i, row in tqdm(df.iterrows()):
        try:
            these_vars = get_variables(row["content"])
        except:
            these_vars = old_get_variables(row["content"])
        for statement in row["assertions"]:
            if len(statement) > 0:
                new_var = str(statement[0])
                if new_var not in these_vars:
                    these_vars.append(new_var)
        ret.append(these_vars)
    return ret

# TODO find package that automatically gets variables

# out = get_variables(df.sample()["content"].iloc[0])
# get_vars = lambda code: get_variables(code)
# tester_df["variables"] = tester_df["content"].apply(get_vars)
# tester_df

In [5]:
# from ipynb.fs.full.Data.GitHub-Assertions import get_variables
class prompt_example:
        def __init__(self, this_in="", this_out=""):
            self.input = this_in
            self.output = this_out
            
        def composite(self):
            return "Example Input:\n" + self.input + "\nExample Output:\n" + self.output
        
class LLM_prompt:       
    def __init__(self, input_code="*Variables:\n[flag, num, i]\n*Code:\n1num = int(input(\"Enter a number: \"))  # Program to check if a number is prime or not\n2flag = False  # define a flag variable\n3\n4if num == 1:\n5    print(num, \"is not a prime number\")\n6elif num > 1: # check for factors\n7    for i in range(2, num):\n8        if (num % i) == 0:\n9            flag = True  # if factor is found, set flag to True\n10            break  # break out of loop\n11    if flag:  # check if flag is True\n12        print(num, \"is not a prime number\")\n13    else:\n14        print(num, \"is a prime number\")",
                 example_in="*Variables:\n[n]\n*Code:\n1def fibonacci(n):\n2   if n <= 1:\n3       return n\n4   else:\n5       return(recur_fibo(n-1) + recur_fibo(n-2))",
                 example_out="[1, n, >=, 1, \"the fibonacci sequence can only be done on posative integers\"]\n\nWhich would be the same as:\n1def fibonacci(n):\n2   assert n >= 1\n3   if n <= 1:\n4       return n\n5   else:\n6       return(recur_fibo(n-1) + recur_fibo(n-2))", 
                 criteria=["Assert that the function can take in all inputs necessary to complete the process",
                           "Assert that all outputs are of the proper sizes."]
                 ):
        self.criteria = criteria
        self.example = prompt_example(example_in, example_out)
        self.input_code = input_code
        
        # default params that are less likely to change
        self.intro = "You are a helpful bot that adds assertions to pieces of Python code."  
        self.input_format = "You will be given a list of variables and a string of code presented in the format:\n*Variables:\n[...]\n*Code:\n..."
        self.criteria_transition = "Generate assertions based on the following criteria:"
        self.output_format = "Your response should ONLY be a list of assertions in the format:\n[line_number, subject_variable, condition_type, target, reasoning]"
        self.output_format_description = ["line_number is an integer referencing the line after which the assertion should be inserted",
                                          "subject_variable and target can ONLY be variables from the input list, integers, booleans, or None", # TODO retest bools
                                          "condition_type can only be a value in this list: [==, >=, <=, !=]",
                                          "reasoning is a short decription of why the assertion was made"]
        self.example_transition = "Here is an example of what your input will look like and what you should return:"
        self.input_transition = "Here is the actual input you should provide assertions for:"
    
    
    
    def composite_criteria(self):
        """ return criteria as a single string"""
        ret = ""
        for i, crit in enumerate(self.criteria):
            ret += str(i+1) + ") " + crit
            if i != len(self.criteria)-1:  # ignore last instance for formatting
                ret += '\n'
        return ret
    
    def composite_output_formatting(self):
        ret = self.output_format
        for desc in self.output_format_description:
            ret += "\n -" + desc 
        return ret
    
    def prompt(self):
        """ return entire prompt"""
        return '\n'.join([self.intro, self.input_format,
                          self.criteria_transition, self.composite_criteria(), "",
                          self.composite_output_formatting(), "",
                          self.example_transition, self.example.composite(), "\n",
                          self.input_transition, self.input_code])
    
    def to_list(self):
        """ return key prompt components as a list """
        return [self.intro, self.formatting, self.criteria, self.example, self.input_code, self.prompt()]
    
    def __str__(self):
        return self.prompt()
    def __repr__(self):
        return self.prompt()

tester = LLM_prompt()
print(len(str(tester)))
tester

# fib_input = "def fibonacci(n):\nassert n >= 1\nif n <= 1:\nreturn n\nelse:\nreturn(recur_fibo(n-1) + recur_fibo(n-2))"
# fib_output = "[1, n, 1, 1, the fibonacci sequence can only be done on posative integers]"

1931


You are a helpful bot that adds assertions to pieces of Python code.
You will be given a list of variables and a string of code presented in the format:
*Variables:
[...]
*Code:
...
Generate assertions based on the following criteria:
1) Assert that the function can take in all inputs necessary to complete the process
2) Assert that all outputs are of the proper sizes.

Your response should ONLY be a list of assertions in the format:
[line_number, subject_variable, condition_type, target, reasoning]
 -line_number is an integer referencing the line after which the assertion should be inserted
 -subject_variable and target can ONLY be variables from the input list, integers, booleans, or None
 -condition_type can only be a value in this list: [==, >=, <=, !=]
 -reasoning is a short decription of why the assertion was made

Here is an example of what your input will look like and what you should return:
Example Input:
*Variables:
[n]
*Code:
1def fibonacci(n):
2   if n <= 1:
3       return

In [6]:
def make_prompts(temp_df):
    prompts = []
    for i, row in tqdm(temp_df.iterrows()):
        # *Variables:\n[flag, num, i]\n*Code:\n
        prompt_param = "*Variables:\n" + str(row["variables"]) + "\n*Code:\n" + row["unasserted"]
        prompts.append(str(LLM_prompt(prompt_param)))
    temp_df["prompt"] = prompts
    temp_df["prompt_len"] = [len(p) for p in prompts]
    return temp_df
# tester_df = make_prompts(tester_df)
# tester_df

In [7]:
# querying
import openai
import altair as alt
import json
from vega_datasets import data

OPENAI_API_KEY = "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" # os.environ['OPENAI_API_KEY']
openai.api_key = OPENAI_API_KEY
def run_gpt4(messages):
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages
    )
    return response["choices"][0]["message"]["content"]


def gpt_oneshot(input_prompt, directive="You are a helpful bot that adds assertions to pieces of Python code.", verbose=False):
    message_hist = [{"role": "system", "content": directive},
                    {"role": "user", "content": input_prompt}]  # init
    response = run_gpt4(message_hist)
#     if verbose:
#         print("chat_gpt: ", response, '\n')
#     message_hist.append({"role": "system", "content": response})
    return response

# print("\n\n", gpt_oneshot("what do you do?"))

In [170]:
df = get_asserted_code(cwd+"/Data/BigQuery/PythonAssertions100k.csv", "%.py", False)
df = get_assertion(df)
df

Found data at /Users/korahughes/Documents/GitHub/LLMCodeGen/Data/BigQuery/PythonAssertions100k.csv


33793it [00:06, 4947.82it/s]


Unnamed: 0,repo_name,content,unasserted,assertions,asserted_lines,parsed_lines,arr,atl
0,tqchen/tvm,# Licensed to the Apache Software Foundation (...,\n1# Licensed to the Apache Software Foundatio...,"[[len(glbs), ==, 3], [len(result), ==, 2], [le...",29,4,0.137931,0.000376
1,Lujeni/ansible,# (c) 2017 Red Hat Inc.\n#\n# This file is par...,\n1# (c) 2017 Red Hat Inc.\n2#\n3# This file i...,"[[exc_info.match('FAIL'), ==, True], [result['...",29,20,0.689655,0.002668
2,lukas-hetzenecker/home-assistant,"""""""The tests for the Pilight sensor platform.""...","\n1""""""The tests for the Pilight sensor platfor...","[[await, async_setup_component(], [state.state...",14,9,0.642857,0.002150
3,schnoebe/fedora-mock,import fcntl\nimport glob\nimport grp\nimport ...,\n1import fcntl\n2import glob\n3import grp\n4i...,"[[our_dir, ==, True]]",1,1,1.000000,0.000044
4,samstav/fastfood,# -*- coding: utf-8 -*-\n# Copyright 2015 Rack...,\n1# -*- coding: utf-8 -*-\n2# Copyright 2015 ...,"[[len(key_val), ==, 2]]",1,1,1.000000,0.000115
...,...,...,...,...,...,...,...,...
33788,raphaelm/django-i18nfield,from i18nfield.admin import I18nModelAdmin\nfr...,\n1from i18nfield.admin import I18nModelAdmin\...,"[[admin.formfield_overrides[I18nCharField], ==...",4,1,0.250000,0.001441
33789,fniephaus/alfred-rworkflow,# The MIT License (MIT)\n#\n# Copyright (c) 20...,\n1# The MIT License (MIT)\n2#\n3# Copyright (...,"[[offset, >, 0]]",1,1,1.000000,0.000223
33790,bgris/ODL_bgris,# -*- coding: utf-8 -*-\r\n#\r\n# Copyright © ...,\n1# -*- coding: utf-8 -*-\r\n2#\r\n3# Copyrig...,[],1,0,0.000000,0.000000
33791,chrsrds/scikit-learn,"""""""\nTesting for the base module (sklearn.ense...","\n1""""""\n2Testing for the base module (sklearn....","[[3, ==, len(ensemble)], [3, ==, len(ensemble....",20,18,0.900000,0.003563


In [None]:
temp = df.iloc[0]["assertions"]
print(type(temp[0]))
print(temp[0][0])

# RUNNING 1SHOT CODE

In [8]:
def one_shot_prompts(my_dir=cwd+"/Data/BigQuery/PythonAssertions100k.csv", ext="%.py"):
    print("GETTING CODE")
    df = get_asserted_code(my_dir, ext, False)
    
    print("\nEXTRACTING ASSERTIONS")
    df = get_assertion(df)
    
    all_prompts = len(df)
    df = df[df["parsed_lines"]!=0]
    all_prompts = 100*len(df)/all_prompts
    print("dropping code with no parsed assertions =>", str(all_prompts)+'%')
    
    print("\nEXTRACTING VARIABLES")
#     get_vars = lambda code: get_variables(code)
#     df["variables"] = df["content"].apply(get_vars)
    df["variables"] = get_all_variables(df)
    
    df["num_vars"] = df["variables"].apply(lambda var: len(var))
    all_prompts = len(df)
    df = df[df["num_vars"] > 0]
    all_prompts = 100*len(df)/all_prompts
    print("dropping code with no extracted variables =>", str(all_prompts)+'%')
    
    print("\nGENERATING PROMPTS")
    df = make_prompts(df)
    
    prompt_limit = 8192
    all_prompts = len(df)
    df = df[df["prompt_len"] < prompt_limit]
    all_prompts = 100*len(df)/all_prompts
    print("dropping prompts over limit =>", str(all_prompts)+'%')
    
    df.to_csv(cwd+"/Data/python_prompts_noresponse3.csv") # saving data
    print("Data checkpoint saved...\n")
    
#     print("GENERATING RESPONSES")
#     responses = []
#     for prompt in tqdm(df["prompt"]):
#         responses.append(gpt_oneshot(prompt))
#     df["gpt"] = responses
#     df.to_csv(cwd+"/python_prompts_withresponse.csv") # saving data
    
    return df

df = one_shot_prompts()

GETTING CODE
Found data at /Users/korahughes/Documents/GitHub/LLMCodeGen/Data/BigQuery/PythonAssertions100k.csv

EXTRACTING ASSERTIONS


33793it [00:06, 5053.96it/s]


dropping code with no parsed assertions => 89.8351729648152%

EXTRACTING VARIABLES
dropping code with no extracted variables => 100.0%

GENERATING PROMPTS


30358it [00:02, 13359.44it/s]


dropping prompts over limit => 48.35298768034785%
Data checkpoint saved...



## PARTIAL EXECUTION OF 1SHOT

In [None]:
part_size = 200  # should take about a bit under an hour
current_size = 14464
def partial_execution(part=1):  # slices of 10xt
    start = (part-1)*part_size
    end = start+part_size
#     if start > current_size:
#         return
#     elif end > current_size:
#         end = current_size+1
    df = pd.read_csv(cwd+"/Data/python_prompts_noresponse.csv").iloc[start:end, 1:]
    print("\nGenerating prompts for indexes", start, "to", end)
    
    responses = []
    for prompt in tqdm(df["prompt"]):
        responses.append(gpt_oneshot(prompt))
    df["gpt"] = responses
    
    df.to_csv((cwd+"/Data/Testing/python_prompts_withresponse_part"+str(part)+".csv"), index=False) # saving data
    print("Saved to:", (cwd+"/Data/Testing/python_prompts_withresponse_part"+str(part)+".csv"))
    return df

df = partial_execution(2)
# for i in range(1, 10):  # up to 73
#     print(partial_execution(i))
#     print('\n')


Generating prompts for indexes 200 to 400


  0%|                                                        | 0/200 [00:00<?, ?it/s]

In [None]:
""" Execution Notes:
- somewhere around 320 instances I get various API errors:
'APIError: Bad gateway. {"error":{"code":502,"message":"Bad gateway.","param":null,"type":"cf_bad_gateway"}} 502 {'error': {'code': 502, 'message': 'Bad gateway.', 'param': None, 'type': 'cf_bad_gateway'}} {'Date': 'Tue, 13 Feb 2024 09:21:48 GMT', 'Content-Type': 'application/json', 'Content-Length': '84', 'Connection': 'keep-alive', 'X-Frame-Options': 'SAMEORIGIN', 'Referrer-Policy': 'same-origin', 'Cache-Control': 'private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0', 'Expires': 'Thu, 01 Jan 1970 00:00:01 GMT', 'Server': 'cloudflare', 'CF-RAY': '854c009eade719c3-EWR', 'alt-svc': 'h3=":443"; ma=86400'}'

"""

In [149]:
df = pd.read_csv(cwd+"/Data/Testing/python_prompts_withresponse_part1.csv").sort_values("prompt_len", ascending=True)
df

Unnamed: 0,repo_name,content,unasserted,assertions,asserted_lines,parsed_lines,arr,atl,variables,num_vars,prompt,prompt_len,gpt
117,tiramisusolutions/carson,# Example test to check if nginx is installed\...,\n1# Example test to check if nginx is install...,"[['nginx.is_installed', '==', 'True']]",1,1,1.000000,0.006897,"['host', 'nginx']",2,You are a helpful bot that adds assertions to ...,1517,"[[2, 'host', '!=', None, ""the host should not ..."
179,scop/bash-completion,"import pytest\n\n\n@pytest.mark.bashcomp(cmd=""...",\n1import pytest\n2\n3\n4@pytest.mark.bashcomp...,"[['completion', '==', 'True']]",1,1,1.000000,0.005525,['completion'],1,You are a helpful bot that adds assertions to ...,1558,"[[7, 'completion', '!=', None, ""the 'completio..."
194,liqd/adhocracy3.mercator,from pytest import mark\n\n\n@mark.usefixtures...,\n1from pytest import mark\n2\n3\n4@mark.usefi...,"[[""root_acm_extension['principals']"", '!=', '[...",2,2,1.000000,0.005525,"['registry', 'context', 'root_acm_extension']",3,You are a helpful bot that adds assertions to ...,1695,"[[7, 'registry', '!=', None, ""assert that the ..."
95,bgris/ODL_bgris,"r""""""\nTests for QtAwesome.\n""""""\n# Standard li...","\n1r""""""\n2Tests for QtAwesome.\n3""""""\n4# Stand...","[['output_number', '==', '0']]",1,1,1.000000,0.002778,['output_number'],1,You are a helpful bot that adds assertions to ...,1751,"[[11, 'output_number', '>=', 0, ""output_number..."
55,jonnylamb/debexpo,import os.path\nimport pylons\nfrom paste.depl...,\n1import os.path\n2import pylons\n3from paste...,"[['os.path.exists(ini_path)', '==', 'True']]",1,1,1.000000,0.002618,"['ini_path', 'conf', 'pylons.config']",3,You are a helpful bot that adds assertions to ...,1783,"[[6, 'ini_path', '!=', '', 'the ini_path shoul..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,rolando/scrapy,import warnings\nimport weakref\nfrom twisted....,\n1import warnings\n2import weakref\n3from twi...,"[['hasattr(x', '==', 'False'], [""text'):"", '==...",53,2,0.037736,0.000223,"['body', 'response', 'sel', 'xl', 'url', 'root...",21,You are a helpful bot that adds assertions to ...,7620,"[[5, 'Selector', '!=', None, ""Selector should ..."
129,jeffery-do/Vizdoombot,import numpy as np\nfrom skimage.morphology im...,\n1import numpy as np\n2from skimage.morpholog...,"[['numpy.any(blocks', '==', '4)'], ['np.all(re...",16,8,0.500000,0.001227,"['im', 'result', 'im[0', '0]', '1]', 'im[3', '...",39,You are a helpful bot that adds assertions to ...,7841,"[[13, 'im', '==', 'result', 'the result of ske..."
135,reahl/reahl,# Copyright 2016-2021 Reahl Software Services ...,\n1# Copyright 2016-2021 Reahl Software Servic...,"[['browser.is_element_present(""//a[@title=\'Ho...",6,8,1.333333,0.001262,"['poppedup_contents', 'web_fixture', 'popup_a_...",15,You are a helpful bot that adds assertions to ...,7899,"[34, 'web_fixture', '!=', None, ""web_fixture s..."
136,shaunstanislaus/magic-wormhole,"from hashlib import sha256, sha1\nimport hmac\...","\n1from hashlib import sha256, sha1\n2import h...","[['dkLen', '<=', 'hlen*255'], ['s.startswith(p...",5,2,0.400000,0.000319,"['SKM', 'dkLen', 'XTS', 'CTXinfo', 'digest', '...",21,You are a helpful bot that adds assertions to ...,7963,"[[6, 'SKM', '!=', None, 'Secret key must be pr..."


In [19]:
df.describe()

Unnamed: 0,asserted_lines,parsed_lines,arr,atl,num_vars,prompt_len
count,200.0,200.0,200.0,200.0,200.0,200.0
mean,10.965,8.525,0.849637,0.002552,13.96,4531.53
std,15.496921,13.600893,0.294534,0.002973,9.514781,1732.097439
min,1.0,1.0,0.037736,0.000185,1.0,1517.0
25%,2.0,1.0,0.746528,0.000588,6.0,2944.25
50%,4.0,3.0,1.0,0.001417,11.5,4519.0
75%,14.0,8.0,1.0,0.003679,20.25,6041.25
max,99.0,95.0,2.0,0.023152,45.0,8140.0


In [151]:
ind = 3
print("Showing Data for prompt_len sorted data in Part1 Ind"+str(ind))
print("\nExtracted Assertion:")
print(df.iloc[ind]["assertions"])
print("\nVariables:")
print(df.iloc[ind]["variables"])
print("\nCode:")
print(df.iloc[ind]["content"])

print("\nGPT Response:")
print(df.iloc[ind]["gpt"])

# print("\nPROMPT:")
# print(df.iloc[ind]["prompt"])

Showing Data for prompt_len sorted data in Part1 Ind3

Extracted Assertion:
[['output_number', '==', '0']]

Variables:
['output_number']

Code:
r"""
Tests for QtAwesome.
"""
# Standard library imports
import subprocess

# Test Library imports
import pytest

def test_segfault_import():
    output_number = subprocess.call('python -c "import qtawesome '
                                    '; qtawesome.icon()"', shell=True)
    assert output_number == 0
    
if __name__ == "__main__":
    pytest.main()


GPT Response:
[[11, 'output_number', '>=', 0, "output_number should not be negative as it represents a process exit status"]]


# Step 5) Replicating for Verilog

In [126]:
import re

def extract_variables(verilog_code):
    # Regular expression to match Verilog variable declarations
    variable_pattern = r'\b(?:wire|reg|integer|real|time)\s+(.*?)\s*[,;]'

    # Find all matches
    matches = re.findall(variable_pattern, verilog_code, re.MULTILINE)

    # Remove extra whitespace and split on commas
    variables = [var.strip() for match in matches for var in match.split(',')]

    return variables

def v_extract_assertions(verilog_code):
    # Regular expression to match Verilog assertions
    assertion_pattern = r'\bassert\s*\((.*?)\)\s*;'
    
    # Find all matches
    matches = re.findall(assertion_pattern, verilog_code, re.MULTILINE)
    
    return matches

def old_v_get_assertions(code):
    ret = []
    for line in code.split('\n'):
        if 'assert' in line:
            ret.append(line.strip())
    return ret

def v_get_variables(assertions):
    ret = []
    for statement in assertions:
        ret.append(statement.split(' ')[0].strip())
    return ret

In [132]:
class VLLM_prompt:       
    def __init__(self, input_code="*Variables:\n[flag, num, i]\n*Code:\n1num = int(input(\"Enter a number: \"))  # Program to check if a number is prime or not\n2flag = False  # define a flag variable\n3\n4if num == 1:\n5    print(num, \"is not a prime number\")\n6elif num > 1: # check for factors\n7    for i in range(2, num):\n8        if (num % i) == 0:\n9            flag = True  # if factor is found, set flag to True\n10            break  # break out of loop\n11    if flag:  # check if flag is True\n12        print(num, \"is not a prime number\")\n13    else:\n14        print(num, \"is a prime number\")",
                 example_in="\n*Variables:\n[a, b]\n*Code:\n1module m (\n2    input a,\n3    b\n4);\n5  a1 :\n6endmodule\n7module m (\n8    input a,\n9    b\n10);\n11  always_comb begin\n12  end\n13endmodule\n14",
                 example_out="[5, #0, a, \"==\", b, \"checking input a and output b at ever 0th clock cycle is necessart for the code to function\"]\n\nWhich would be the same as:\nmodule m (\n    input a,\n    b\n);\n  a1 :\n  assert #0 (a == b);\nendmodule\nmodule m (\n    input a,\n    b\n);\n  always_comb begin\n    a1 : assert #0 (a == b);\n  end\nendmodule", 
                 criteria=["Assert that the function can take in all inputs necessary to complete the process",
                           "Assert that all outputs are of the proper sizes."]
                 ):
        self.criteria = criteria
        self.example = prompt_example(example_in, example_out)
        self.input_code = input_code
        
        # default params that are less likely to change
        self.intro = "You are a helpful bot that adds assertions to pieces of Verilog code."  
        self.input_format = "You will be given a string of code presented in the format:\n*Variables:\n...\n*Code:\n..."
        self.criteria_transition = "Generate assertions based on the following criteria:"
        self.output_format = "Your response should ONLY be a list of assertions in the format:\n[line_number, timing, subject_variable, condition_type, target, reasoning]"
        self.output_format_description = ["line_number is an integer referencing the line after which the assertion should be inserted",
                                          "timing is the clock cycle(s) at which the assertion is checked relative to the present cycle",
                                          "subject_variable and target can ONLY be variables present in the code, integers, booleans, or None",
                                          "condition_type can only be a value in this list: [==, >=, <=, !=]",
                                          "reasoning is a short decription of why the assertion was made"]
        self.example_transition = "Here is an example of what your input will look like and what you should return:"
        self.input_transition = "Here is the actual input you should provide assertions for:"
    
    
    
    def composite_criteria(self):
        """ return criteria as a single string"""
        ret = ""
        for i, crit in enumerate(self.criteria):
            ret += str(i+1) + ") " + crit
            if i != len(self.criteria)-1:  # ignore last instance for formatting
                ret += '\n'
        return ret
    
    def composite_output_formatting(self):
        ret = self.output_format
        for desc in self.output_format_description:
            ret += "\n -" + desc 
        return ret
    
    def prompt(self):
        """ return entire prompt"""
        return '\n'.join([self.intro, self.input_format,
                          self.criteria_transition, self.composite_criteria(), "",
                          self.composite_output_formatting(), "",
                          self.example_transition, self.example.composite(), "\n",
                          self.input_transition, self.input_code])
    
    def to_list(self):
        """ return key prompt components as a list """
        return [self.intro, self.formatting, self.criteria, self.example, self.input_code, self.prompt()]
    
    def __str__(self):
        return self.prompt()
    def __repr__(self):
        return self.prompt()
    
def v_gen_prompt(df):
    ret = []
    for i, row in tqdm(df.iterrows()):
        my_vars = row["variables"]
        my_code = row["unasserted"]
        prompt_param = "*Variables:\n" + str(row["variables"]) + "\n*Code:\n" + row["unasserted"]
        ret.append(str(VLLM_prompt(prompt_param)))
    return ret
        
        
print(str(VLLM_prompt(vdf.iloc[2]["unasserted"])))

You are a helpful bot that adds assertions to pieces of Verilog code.
You will be given a string of code presented in the format:
*Variables:
...
*Code:
...
Generate assertions based on the following criteria:
1) Assert that the function can take in all inputs necessary to complete the process
2) Assert that all outputs are of the proper sizes.

Your response should ONLY be a list of assertions in the format:
[line_number, timing, subject_variable, condition_type, target, reasoning]
 -line_number is an integer referencing the line after which the assertion should be inserted
 -timing is the clock cycle(s) at which the assertion is checked relative to the present cycle
 -subject_variable and target can ONLY be variables present in the code, integers, booleans, or None
 -condition_type can only be a value in this list: [==, >=, <=, !=]
 -reasoning is a short decription of why the assertion was made

Here is an example of what your input will look like and what you should return:
Example 

In [160]:
vdf = get_asserted_code(verilog_dir, "", False)
vdf["content_len"] = vdf["content"].apply(lambda code: len(code))
vdf = vdf.sort_values("content_len", ascending=True)
vdf["unasserted"] = vdf["content"].apply(lambda code: unassert(code))

vdf["assertions"] = vdf["content"].apply(lambda code: v_extract_assertions(code))
vdf["num_assertions"] = vdf["assertions"].apply(lambda code: len(code))
all_prompts = len(vdf)
vdf = vdf[vdf["num_assertions"] > 0]
all_prompts = 100*len(vdf)/all_prompts
print("\ndropping prompts with no exctracted assertions =>", str(all_prompts)+'%')
print()

vdf["variables"] = vdf["assertions"].apply(lambda code: v_get_variables(code))
vdf["num_vars"] = vdf["variables"].apply(lambda code: len(code))

vdf["prompt"] = v_gen_prompt(vdf)
vdf["prompt_len"] = vdf["prompt"].apply(lambda code: len(code))
vdf

Found data at /Users/korahughes/Documents/GitHub/LLMCodeGen/Data/BigQuery/VerilogAssertions-ALL.csv

dropping prompts with no exctracted assertions => 24.663677130044842%



110it [00:00, 10766.42it/s]


Unnamed: 0,repo_name,content,content_len,unasserted,assertions,num_assertions,variables,num_vars,prompt,prompt_len
232,alainmarcel/Surelog,"module tb (input clock, a, b);\n\twire x, y;\n...",250,"\n1module tb (input clock, a, b);\n2\twire x, ...","[x == ($past(a, 2) ^ $past(b, 2)), y == (!$pas...",2,"[x, y]",2,You are a helpful bot that adds assertions to ...,1768
1,swallat/yosys,"module top (\n input clk, rst,\n output reg ...",272,"\n1module top (\n2 input clk, rst,\n3 output...",[cnt != 15],1,[cnt],1,You are a helpful bot that adds assertions to ...,1868
105,YosysHQ/yosys,module top;\n reg [0:7] mem [0:2];\n\n i...,584,\n1module top;\n2 reg [0:7] mem [0:2];\n3\n...,"[$countbits(a, '0) == 24, $countbits(a, '1) ==...",9,"[$countbits(a,, $countbits(a,, $countbits(a,, ...",9,You are a helpful bot that adds assertions to ...,2006
108,YosysHQ/yosys,// An example showing how parameters get infer...,617,\n1// An example showing how parameters get in...,"[w0 == '0, w1 == u1 ^ v1]",2,"[w0, w1]",2,You are a helpful bot that adds assertions to ...,2205
39,zachjs/sv2v,"module Module(input clock, input clear, input ...",757,"\n1module Module(input clock, input clear, inp...","[1, 1, 1, 1]",4,"[1, 1, 1, 1]",4,You are a helpful bot that adds assertions to ...,2169
...,...,...,...,...,...,...,...,...,...,...
23,lowRISC/ibex,// Copyright lowRISC contributors.\n// License...,30368,\n1// Copyright lowRISC contributors.\n2// Lic...,"[invalidate_seed < mem_states.size, mem_states...",5,"[invalidate_seed, mem_states.size, mem_states....",5,You are a helpful bot that adds assertions to ...,33917
277,alainmarcel/Surelog,/**\n * bp_me_nonsynth_mock_lce.v\n *\n * This...,46825,\n1/**\n2 * bp_me_nonsynth_mock_lce.v\n3 *\n4 ...,"[cce_block_width_p >= 64) else $error(""cce_blo...",5,"[cce_block_width_p, `BSG_IS_POW2(cce_block_wid...",5,You are a helpful bot that adds assertions to ...,52021
182,alainmarcel/Surelog,/**\n *\n * Name:\n * bp_cce_fsm.v\n *\n * D...,62575,\n1/**\n2 *\n3 * Name:\n4 * bp_cce_fsm.v\n5 ...,"[lce_sets_p > 1) else $error(""Number of LCE se...",7,"[lce_sets_p, counter_max, counter_max, icache_...",7,You are a helpful bot that adds assertions to ...,69153
304,fabianschuiki/moore,// Author: Florian Zaruba <zarubaf@iis.ee.ethz...,171780,\n1// Author: Florian Zaruba <zarubaf@iis.ee.e...,"[DEPTH > 0) else $error(""DEPTH mus...",1,[DEPTH],1,You are a helpful bot that adds assertions to ...,188487


In [161]:
prompt_limit = 8192
all_prompts = len(vdf)
vdf = vdf[vdf["prompt_len"] < prompt_limit]
all_prompts = 100*len(vdf)/all_prompts
print("dropping prompts over limit =>", str(all_prompts)+'%')
print(len(vdf), "left...")

dropping prompts over limit => 63.63636363636363%
70 left...


In [162]:
print("GENERATING RESPONSES")
responses = []
for prompt in tqdm(vdf["prompt"]):
    responses.append(gpt_oneshot(prompt))
vdf["gpt"] = responses
vdf.to_csv(cwd+"/verilog_prompts_withresponse3.csv") # saving data

GENERATING RESPONSES


100%|████████████████████████████████████████████████| 70/70 [26:06<00:00, 22.38s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vdf["gpt"] = responses


In [156]:
vdf = pd.read_csv(cwd+"/verilog_prompts_withresponse2.csv")
vdf

Unnamed: 0.1,Unnamed: 0,repo_name,content,content_len,unasserted,assertions,num_assertions,variables,num_vars,prompt,prompt_len,gpt
0,232,alainmarcel/Surelog,"module tb (input clock, a, b);\n\twire x, y;\n...",250,"\n1module tb (input clock, a, b);\n2\twire x, ...","['x == ($past(a, 2) ^ $past(b, 2))', 'y == (!$...",2,"['x', 'y']",2,You are a helpful bot that adds assertions to ...,1768,"[[2, '#0', 'x', '!=', None, ""Asserts that the ..."
1,1,swallat/yosys,"module top (\n input clk, rst,\n output reg ...",272,"\n1module top (\n2 input clk, rst,\n3 output...",['cnt != 15'],1,['cnt'],1,You are a helpful bot that adds assertions to ...,1868,"[[4, '#0', 'cnt', '>=', '0', 'Checking that ou..."
2,105,YosysHQ/yosys,module top;\n reg [0:7] mem [0:2];\n\n i...,584,\n1module top;\n2 reg [0:7] mem [0:2];\n3\n...,"[""$countbits(a, '0) == 24"", ""$countbits(a, '1)...",9,"['$countbits(a,', '$countbits(a,', '$countbits...",9,You are a helpful bot that adds assertions to ...,2006,"[6, #0, a, ""=="", 'mem[1]', 'Verifying successf..."
3,108,YosysHQ/yosys,// An example showing how parameters get infer...,617,\n1// An example showing how parameters get in...,"[""w0 == '0"", 'w1 == u1 ^ v1']",2,"['w0', 'w1']",2,You are a helpful bot that adds assertions to ...,2205,"[3, #0, 'a', ""=="", 'b', ""Inputs 'a' and 'b' mu..."
4,39,zachjs/sv2v,"module Module(input clock, input clear, input ...",757,"\n1module Module(input clock, input clear, inp...","['1', '1', '1', '1']",4,"['1', '1', '1', '1']",4,You are a helpful bot that adds assertions to ...,2169,"[[3, ""#0"", ""y"", ""=="", ""data"", ""at every clock ..."
5,13,tudortimi/verification-gentleman-blog-code,// Copyright 2015 Tudor Timisescu (verificatio...,902,\n1// Copyright 2015 Tudor Timisescu (verifica...,['std::randomize(some_var) with { some_var == ...,1,['std::randomize(some_var)'],1,You are a helpful bot that adds assertions to ...,2457,"Given the provided code, the assertion is appl..."
6,72,tudortimi/verification-gentleman-blog-code,// Copyright 2015 Tudor Timisescu (verificatio...,904,\n1// Copyright 2015 Tudor Timisescu (verifica...,['std::randomize(some_var) with { some_var == ...,1,['std::randomize(some_var)'],1,You are a helpful bot that adds assertions to ...,2455,"I'm sorry for the misunderstanding, but as a P..."
7,104,YosysHQ/yosys,"module example #(\n parameter w,\n param...",1032,"\n1module example #(\n2 parameter w,\n3 ...","['a1 == 0', 'b1 == 1', 'c1 == 2', 'd1 == 3', '...",16,"['a1', 'b1', 'c1', 'd1', 'a2', 'b2', 'c2', 'd3...",16,You are a helpful bot that adds assertions to ...,2362,"[[13, '#0', 'c', '==', 'y', 'Checking if input..."
8,79,dennis-musk/socfpga,module user_input_device_tb ();\n\nreg clk = 1...,1047,\n1module user_input_device_tb ();\n2\n3reg cl...,"['!avl_irq', 'avl_irq', '!avl_irq', 'avl_irq',...",9,"['!avl_irq', 'avl_irq', '!avl_irq', 'avl_irq',...",9,You are a helpful bot that adds assertions to ...,2571,"[12, #0, 'avl_irq', '!=', None, ""assert that t..."
9,110,YosysHQ/yosys,"module pass_through(\n input [63:0] inp,\n ...",1064,\n1module pass_through(\n2 input [63:0] inp...,"[""o01 === {64 {1'b0}}"", ""o02 === {64 {1'b1}}"",...",12,"['o01', 'o02', 'o03', 'o04', 'o05', 'o06', 'o0...",12,You are a helpful bot that adds assertions to ...,2311,"[[4, '#0', 'inp', '==', 'out', 'Asserting that..."


In [157]:
ind = 1
print("Showing Data for content_len sorted data Ind"+str(ind))
print("\nExtracted Assertion:")
print(vdf.iloc[ind]["assertions"])

print("\nVariables:")
print(vdf.iloc[ind]["variables"])

print("\nGPT Response:")
print(vdf.iloc[ind]["gpt"])

print("\nCode:")
print(vdf.iloc[ind]["content"])  # unasserted


print("\nPROMPT:")
print(vdf.iloc[ind]["prompt"])

Showing Data for content_len sorted data Ind1

Extracted Assertion:
['cnt != 15']

Variables:
['cnt']

GPT Response:
[[4, '#0', 'cnt', '>=', '0', 'Checking that output cnt has initialized all its components'],
 [7, '#0', 'clk', '!=', None, 'Check that clk signal toggles at the 0th clock cycle'],
 [15, '#1', 'cnt', '<=', '10', 'Checking cnt does not reach 10 at the next clock cycle']]

Code:
module top (
  input clk, rst,
  output reg [3:0] cnt
);
  initial cnt = 0;

  always @(posedge clk) begin
    if (rst)
      cnt <= 0;
    else
      cnt <= cnt + 4'd 1;
  end

  always @(posedge clk) begin
    assume (cnt != 10);
    assert (cnt != 15);
  end
endmodule


PROMPT:
You are a helpful bot that adds assertions to pieces of Verilog code.
You will be given a string of code presented in the format:
*Variables:
...
*Code:
...
Generate assertions based on the following criteria:
1) Assert that the function can take in all inputs necessary to complete the process
2) Assert that all outputs ar

## Step 3) Parse & Evaluate GPT's Response

### Step 3.1) Restore the assertion(s) generated to code and evaluate
> Metrics of evaluation, does it run? does it add to the code? is it ground-truth-like? human evaluator rank? gpt evaluator rank?

In [25]:
def get_gpt_assertions(response, code):
    """ takes in chat gpt's response and outputs its assertions as well as a string of code with said assertions in it """
    asserts = []
    parsed_code = code.split('\n')
    for line in response.split('\n'):
        line.replace('[', '').replace(']', '')
        separated = line.split(',')
        full_assert = separated[1:-1] # ommit reasoning
        
        # TODO: handle case where there are other ints in the code
        line_num = separated[0]
        num_size = len(str(line_num))
        has_found = False
        for i, line in enumerate(parsed_code):
            if line_num in line[:num_size+1]:
                parsed_code.insert(i+1, full_assert)
                asserts.append(full_assert)
                has_found = True
                break
        if not has_found:
            print("Could not find location of\n", full_assert, "\nin\n", code)
    return '\n'.join(parsed_code), asserts

example_response = tester_df.sample()
print(example_response["gpt"].iloc[0])
temp_test = get_gpt_assertions(example_response["gpt"].iloc[0], example_response["unasserted"].iloc[0])
print(temp_test)

In [None]:
gpt_asserted_code = []  # snippets of code greated by the response assertions from gpt
gpt_assertions = []  # the decoded assertions themselves
gpt_num_assertions = []  # the number of assertions gpt generated
gpt_ratio_assertions = []   # num_gen_assertions / num_parsed_assertions
gpt_matched_assertions = []  # assertions that roughly equal ground-truth
gpt_matched_assertions_ratio = []  # num_matched_assertions / num_ground_truth_assertions

for i, row in tester_df.iterrows():
    new_code, asserts = get_gpt_assertions(row["gpt"], row["Unasserted"])
    gpt_asserted_code.append(new_code)
    gpt_assertions.append(asserts)
    gpt_num_assertions.append(len(asserts))
    gpt_ratio_assertions.append(len(asserts)/row["parsed_lines"])
    # TODO get number of matching assertions
    matched_num = ...
    gpt_matched_assertions.append(matched_num)
    gpt_matched_assertions_ratio.append(matched_num/len(asserts))
tester_df["gpt_asserted_code"] = gpt_asserted_code
tester_df["gpt_assertions"] = gpt_assertions
tester_df["gpt_num_assertions"] = gpt_num_assertions
tester_df["gpt_ratio_assertions"] = gpt_ratio_assertions
tester_df["gpt_matched_assertions"] = gpt_matched_assertions
tester_df["gpt_matched_assertions_ratio"] = gpt_matched_assertions_ratio

In [None]:
tester = df.iloc[ind]["assertions"][1:-1].split('], [')
# print(tester)
print(df.iloc[ind]["assertions"][1:-1])
# def revive_assertion(my_list):
#     my_list[1:-1]

# for i, row in df.iterrows():
#     for list(assertion) in row["assertions"]:
#         to_find = assertion.replace('[','').replace(']','')
#         if to_find in row["gpt"]:
#             print("Found a match!")
#             print(assertion)
#             print("found at")
#             print(row["gpt"])
# print("\nDONE")

In [None]:
""" TODO: test word-mover's distance comparison? """

## Step 4) ...