In [1]:
import json
import os
from datetime import datetime

# Result Collection

### Information about the files and prompts

In [2]:
# get all folders in the prompts directory
def get_folders(directory):
    return [f for f in os.listdir(directory) if os.path.isdir(os.path.join(directory, f))]
application_folders = get_folders('./prompts/')

# get all files in the application folders
def get_files(directory):
    return [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]

application_prompt_files = {}
for folder in application_folders:
    application_prompt_files[folder] = get_files('./prompts/' + folder)

# count the number of prompts in the application folders
application_prompt_count = {}
# get number of lines in a jsonl file
def get_line_count(file):
    with open(file) as f:
        return sum(1 for line in f)

for application_folder in application_folders:
    application_prompt_count[application_folder] = 0
    for file in application_prompt_files[application_folder]:
        application_prompt_count[application_folder] += get_line_count('./prompts/' + application_folder + '/' + file)



In [3]:
# print the results
print("Total prompts in each application folder:")
print(json.dumps(application_prompt_count, indent=4))
print("Total prompts in all application folders:")
print(sum(application_prompt_count.values()))

# print the number of files each application folder had
# this is one fourth of the number of prompts in each application folder
print("Number of files in each application folder:")
print(json.dumps({k: v/4 for k, v in application_prompt_count.items()}, indent=4))

Total prompts in each application folder:
{
    "awcm": 1344,
    "bwapp": 788,
    "dvwa": 504,
    "events_lister": 108,
    "jsforum": 160,
    "mybb": 1260,
    "mybloggie": 236,
    "online_store": 140,
    "phpns": 120,
    "phpoll": 108,
    "scarf-beta": 76,
    "wackopicko": 196
}
Total prompts in all application folders:
5040
Number of files in each application folder:
{
    "awcm": 336.0,
    "bwapp": 197.0,
    "dvwa": 126.0,
    "events_lister": 27.0,
    "jsforum": 40.0,
    "mybb": 315.0,
    "mybloggie": 59.0,
    "online_store": 35.0,
    "phpns": 30.0,
    "phpoll": 27.0,
    "scarf-beta": 19.0,
    "wackopicko": 49.0
}


### Possible vulnerable files

In [4]:
cwd = './analyze/analyze/'
vulnerable_files_jsonl = cwd + 'vulnerable_files.jsonl'
vulnerable_files_cot_jsonl = cwd + 'cot_vulnerable_files.jsonl'

order_of_apps = ["scarf-beta", "awcm", "events_lister", "bwapp", "dvwa", "jsforum", "mybb", "mybloggie", "phpns", "phpoll", "online_store", "wackopicko"]   
vulnerable_files_dict = {}
with open(vulnerable_files_jsonl, 'r') as f:

    lines = f.readlines()
    for app in order_of_apps:
        tmp_dict = {}
        tmp_dict["basic"] = json.loads(lines.pop(0)).get("vulnerable_files")
        tmp_dict["in_context_random"] = json.loads(lines.pop(0)).get("vulnerable_files")
        tmp_dict["in_context_pair"] = json.loads(lines.pop(0)).get("vulnerable_files")
        vulnerable_files_dict[app] = tmp_dict

with open(vulnerable_files_cot_jsonl, 'r') as f:
    lines = f.readlines()
    for app in order_of_apps:
        vulnerable_files_dict[app]["simple_cot"] = json.loads(lines.pop(0)).get("vulnerable_files")


In [5]:
print("Vulnerable files in each application:")
print(json.dumps(vulnerable_files_dict, indent=4))

Vulnerable files in each application:
{
    "scarf-beta": {
        "basic": [
            "C:\\Users\\sigur\\Downloads\\master-applications\\scarf-beta\\addsession.php",
            "C:\\Users\\sigur\\Downloads\\master-applications\\scarf-beta\\comments.php",
            "C:\\Users\\sigur\\Downloads\\master-applications\\scarf-beta\\editpaper.php",
            "C:\\Users\\sigur\\Downloads\\master-applications\\scarf-beta\\editsession.php",
            "C:\\Users\\sigur\\Downloads\\master-applications\\scarf-beta\\forgot.php",
            "C:\\Users\\sigur\\Downloads\\master-applications\\scarf-beta\\functions.php",
            "C:\\Users\\sigur\\Downloads\\master-applications\\scarf-beta\\generaloptions.php",
            "C:\\Users\\sigur\\Downloads\\master-applications\\scarf-beta\\getfile.php",
            "C:\\Users\\sigur\\Downloads\\master-applications\\scarf-beta\\getpaper.php",
            "C:\\Users\\sigur\\Downloads\\master-applications\\scarf-beta\\header.php",
            "

In [6]:
# number of vulnerable files in each application and method
vulnerable_files_count = {}
for app in order_of_apps:
    vulnerable_files_count[app] = {}
    for method in ["basic", "in_context_random", "in_context_pair", "simple_cot"]:
        vulnerable_files_count[app][method] = len(vulnerable_files_dict[app][method])

In [7]:
# print the results
print("Number of vulnerable files in each application and method:")
print(json.dumps(vulnerable_files_count, indent=4))

Number of vulnerable files in each application and method:
{
    "scarf-beta": {
        "basic": 17,
        "in_context_random": 18,
        "in_context_pair": 18,
        "simple_cot": 16
    },
    "awcm": {
        "basic": 181,
        "in_context_random": 190,
        "in_context_pair": 191,
        "simple_cot": 165
    },
    "events_lister": {
        "basic": 21,
        "in_context_random": 25,
        "in_context_pair": 26,
        "simple_cot": 22
    },
    "bwapp": {
        "basic": 154,
        "in_context_random": 166,
        "in_context_pair": 163,
        "simple_cot": 146
    },
    "dvwa": {
        "basic": 91,
        "in_context_random": 89,
        "in_context_pair": 90,
        "simple_cot": 72
    },
    "jsforum": {
        "basic": 28,
        "in_context_random": 28,
        "in_context_pair": 29,
        "simple_cot": 20
    },
    "mybb": {
        "basic": 137,
        "in_context_random": 134,
        "in_context_pair": 133,
        "simple_cot": 10

In [8]:
# analyze results for each application
analyze_results = {}
with open(cwd + 'analyze_results.jsonl', 'r') as f:
    lines = f.readlines()
    for app in order_of_apps:
        tmp_dict = {}
        tmp_dict['basic'] = json.loads(lines.pop(0))
        tmp_dict['in_context_random'] = json.loads(lines.pop(0))
        tmp_dict['in_context_pair'] = json.loads(lines.pop(0))
        analyze_results[app] = tmp_dict

with open(cwd + 'cot_analyze_results.jsonl', 'r') as f:
    lines = f.readlines()
    for app in order_of_apps:
        analyze_results[app]['simple_cot'] = json.loads(lines.pop(0))
        

In [9]:
print("Analyze results for each application:")
print(json.dumps(analyze_results, indent=4))

Analyze results for each application:
{
    "scarf-beta": {
        "basic": {
            "total": 19,
            "yes": 17,
            "no": 2,
            "other": 0,
            "yes_but_no_cwe": 0,
            "file": "results/scarf-beta/basic.jsonl"
        },
        "in_context_random": {
            "total": 19,
            "yes": 18,
            "no": 1,
            "other": 0,
            "yes_but_no_cwe": 0,
            "file": "results/scarf-beta/in_context_random.jsonl"
        },
        "in_context_pair": {
            "total": 19,
            "yes": 18,
            "no": 1,
            "other": 0,
            "yes_but_no_cwe": 0,
            "file": "results/scarf-beta/in_context_pair.jsonl"
        },
        "simple_cot": {
            "total": 19,
            "yes": 18,
            "no": 1,
            "other": 0,
            "yes_but_no_cwe": 2,
            "file": "results/scarf-beta/simple_cot.jsonl"
        }
    },
    "awcm": {
        "basic": {
           

In [10]:
# Get fraction of vulnerable files in each application and method
fraction_vulnerable_files = {}
for app in order_of_apps:
    fraction_vulnerable_files[app] = {}
    for method in ["basic", "in_context_random", "in_context_pair", "simple_cot"]:
        data = analyze_results[app][method]
        total = data['total']
        yes = data['yes']
        no = data['no']
        yes_but_no_cwe = data['yes_but_no_cwe']
        actual_yes = yes - yes_but_no_cwe
        fraction = (actual_yes / total)
        fraction_vulnerable_files[app][method] = fraction

In [11]:
# print the results
print("Fraction of vulnerable files in each application and method:")
print(json.dumps(fraction_vulnerable_files, indent=4))

Fraction of vulnerable files in each application and method:
{
    "scarf-beta": {
        "basic": 0.8947368421052632,
        "in_context_random": 0.9473684210526315,
        "in_context_pair": 0.9473684210526315,
        "simple_cot": 0.8421052631578947
    },
    "awcm": {
        "basic": 0.5386904761904762,
        "in_context_random": 0.5654761904761905,
        "in_context_pair": 0.5684523809523809,
        "simple_cot": 0.49107142857142855
    },
    "events_lister": {
        "basic": 0.7777777777777778,
        "in_context_random": 0.9259259259259259,
        "in_context_pair": 0.9629629629629629,
        "simple_cot": 0.8148148148148148
    },
    "bwapp": {
        "basic": 0.7817258883248731,
        "in_context_random": 0.8426395939086294,
        "in_context_pair": 0.8274111675126904,
        "simple_cot": 0.7411167512690355
    },
    "dvwa": {
        "basic": 0.7222222222222222,
        "in_context_random": 0.7063492063492064,
        "in_context_pair": 0.71428571428

### Total tokens

In [12]:
results_dir = './results/'
# get all files in a directory
def get_files(directory):
    return [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
# get all folders in a directory
def get_folders(directory):
    return [f for f in os.listdir(directory) if os.path.isdir(os.path.join(directory, f))]
tokens = {}
for app in order_of_apps:
    tokens[app] = {}
    for method in ["basic", "in_context_random", "in_context_pair", "simple_cot"]:
        tokens[app][method] = {}
        file_path = results_dir + app + '/' + method + '.jsonl'
        with open(file_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                data = json.loads(line)
                file = data.get('file')
                response = data.get('response')
                usage = response.get('usage')
                tokens[app][method][file] = usage

# total tokens in each application and method
total_tokens = {}
for app in order_of_apps:
    total_tokens[app] = {}
    for method in ["basic", "in_context_random", "in_context_pair", "simple_cot"]:
        total_tokens[app][method] = {}
        # for each file  in the method of application. /reset after method
        prompt_tokens = 0
        completion_tokens = 0
        for file in tokens[app][method]:
            prompt_tokens += tokens[app][method][file].get('prompt_tokens')
            completion_tokens += tokens[app][method][file].get('completion_tokens')
        total_tokens[app][method]['total'] = {
            'prompt_tokens': prompt_tokens,
            'completion_tokens': completion_tokens,
            'total_tokens': prompt_tokens + completion_tokens
            }
        prompt_tokens = 0
        completion_tokens = 0
    #summed for all methods in a single app
# summed all methods in every app

for app in order_of_apps:
    #Sum tokens for each app
    promp_tokens = 0
    completion_tokens = 0
    for method in ["basic", "in_context_random", "in_context_pair", "simple_cot"]:
        prompt_tokens += total_tokens[app][method]['total']['prompt_tokens']
        completion_tokens += total_tokens[app][method]['total']['completion_tokens']
    total_tokens[app]['total'] = {
        'prompt_tokens': prompt_tokens,
        'completion_tokens': completion_tokens,
        'total_tokens': prompt_tokens + completion_tokens
    }
    prompt_tokens = 0
    completion_tokens = 0

#Sum tokens in total
prompt_tokens = 0
completion_tokens = 0
for app in order_of_apps:
    prompt_tokens += total_tokens[app]['total']['prompt_tokens']
    completion_tokens += total_tokens[app]['total']['completion_tokens']
total_tokens['total'] = {
    'prompt_tokens': prompt_tokens,
    'completion_tokens': completion_tokens,
    'total_tokens': prompt_tokens + completion_tokens
}

# get total tokens based on method:
total_tokens_method = {}
for method in ["basic", "in_context_random", "in_context_pair", "simple_cot"]:
    prompt_tokens = 0
    completion_tokens = 0
    for app in order_of_apps:
        prompt_tokens += total_tokens[app][method]['total']['prompt_tokens']
        completion_tokens += total_tokens[app][method]['total']['completion_tokens']
    total_tokens_method[method] = {
        'prompt_tokens': prompt_tokens,
        'completion_tokens': completion_tokens,
        'total_tokens': prompt_tokens + completion_tokens
    }

# get total tokens based on application:
total_tokens_application = {}
for app in order_of_apps:
    total_tokens_application[app] = total_tokens[app]['total']

In [13]:
# Get total token usage
print("Total Token usage")
print(total_tokens)

# Get total token usage based on method
print("Total Token usage based on method")
print(total_tokens_method)

# Get total token usage based on application
print("Total Token usage based on method")
print(total_tokens_application)
print()

Total Token usage
{'scarf-beta': {'basic': {'total': {'prompt_tokens': 25617, 'completion_tokens': 7420, 'total_tokens': 33037}}, 'in_context_random': {'total': {'prompt_tokens': 60313, 'completion_tokens': 7157, 'total_tokens': 67470}}, 'in_context_pair': {'total': {'prompt_tokens': 74007, 'completion_tokens': 8198, 'total_tokens': 82205}}, 'simple_cot': {'total': {'prompt_tokens': 43438, 'completion_tokens': 22235, 'total_tokens': 65673}}, 'total': {'prompt_tokens': 203375, 'completion_tokens': 45010, 'total_tokens': 248385}}, 'awcm': {'basic': {'total': {'prompt_tokens': 785245, 'completion_tokens': 88695, 'total_tokens': 873940}}, 'in_context_random': {'total': {'prompt_tokens': 1389478, 'completion_tokens': 114491, 'total_tokens': 1503969}}, 'in_context_pair': {'total': {'prompt_tokens': 1598971, 'completion_tokens': 120307, 'total_tokens': 1719278}}, 'simple_cot': {'total': {'prompt_tokens': 1106547, 'completion_tokens': 338642, 'total_tokens': 1445189}}, 'total': {'prompt_tokens

In [14]:
# Count total lines of code in each application

def get_all_files(directory):
    return [os.path.join(dp, f) for dp, dn, filenames in os.walk(directory) for f in filenames]

def get_all_files_of_type(directory, file_type):
    return [os.path.join(dp, f) for dp, dn, filenames in os.walk(directory) for f in filenames if f.endswith(file_type)]

scarf_path = r"C:\Users\sigur\Downloads\master-applications\scarf-beta"
awcm_path = r"C:\Users\sigur\Downloads\master-applications\AWCM v2.2 final\awcm"
events_lister_path = r"C:\Users\sigur\Downloads\master-applications\Basic-php-events-lister2.03\events2"
bwapp_path = r"C:\Users\sigur\Downloads\master-applications\bWAPPv2.2\bWAPP"
dvwa_path = r"C:\Users\sigur\Downloads\master-applications\DVWA-master\DVWA-master"
jsforum_path = r"C:\Users\sigur\Downloads\master-applications\JsForum0.01beta"
mybb_path = r"C:\Users\sigur\Downloads\master-applications\mybb_1607\Upload"
mybloggie_path = r"C:\Users\sigur\Downloads\master-applications\mybloggie214"
phpns_path = r"C:\Users\sigur\Downloads\master-applications\phpns211"
phpoll_path = r"C:\Users\sigur\Downloads\master-applications\phpoll-097beta"
online_store_path = r"C:\Users\sigur\Downloads\master-applications\Shopping-web-Jsp-Servlet-master\Shopping-web-Jsp-Servlet-master"
wackopicko_path = r"C:\Users\sigur\Downloads\master-applications\WackoPicko-master"
paths = [scarf_path, awcm_path, events_lister_path, bwapp_path, dvwa_path, jsforum_path, mybb_path, mybloggie_path, phpns_path, phpoll_path, online_store_path, wackopicko_path]
path_order = ["scarf-beta", "awcm", "events_lister", "bwapp", "dvwa", "jsforum", "mybb", "mybloggie", "phpns", "phpoll", "online_store", "wackopicko"]



total_lines_of_code = {}
for i in range(len(paths)):
    
    app = path_order[i]
    total_lines_of_code[app] = 0
    path = paths[i]
    
    all_files = get_all_files_of_type(path, ".php")
    all_files += get_all_files_of_type(path, ".jsp")
    all_files += get_all_files_of_type(path, ".java")
    for file in all_files:
        with open(file, 'r', encoding='utf-8',errors='replace') as f:
            try:
                lines = f.readlines()
                total_lines_of_code[app] += len(lines)
            except:
                print("Error in file: ", file)
                print(app)

print("Total lines of code in each application")
print(json.dumps(total_lines_of_code, indent=4))
    

Total lines of code in each application
{
    "scarf-beta": 1685,
    "awcm": 14727,
    "events_lister": 3217,
    "bwapp": 57111,
    "dvwa": 8313,
    "jsforum": 2930,
    "mybb": 136650,
    "mybloggie": 9485,
    "phpns": 6539,
    "phpoll": 4548,
    "online_store": 3080,
    "wackopicko": 3042
}


In [15]:
# Calcualte the total response time for each application

# There was an issue in the first run,
# The log.jsonl file shows that the mybloggie run was aborted the first time. 
# Line 4278 to 4442 must be ignored 

timestamps = {}
current_app = ""
log_file = "./logger/log.jsonl"
with open(log_file, 'r') as f:
    lines = f.readlines()
    i = 0
    for line in lines:
        i += 1
        if i >= 4278 and i <= 4442:
            continue
        data = json.loads(line)
        timestamp_str = data.get("timestamp")
        timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S.%f")
        message = data.get("message")
        if message.startswith("Starting task for"):
            current_app = message.split(" ")[-1]
            timestamps[current_app] = {}
            for method in ["basic", "in_context_random", "in_context_pair", "simple_cot"]:
                timestamps[current_app][method] = []
        elif message.startswith("prompts/"):
            method = message.split("/")[-1]
            method = method.split(".")[0]
            timestamps[current_app][method].append(timestamp)
            continue
    
response_times = {}
for app in order_of_apps:
    response_times[app] = {}
    for method in ["basic", "in_context_random", "in_context_pair", "simple_cot"]:
        response_times[app][method] = (timestamps[app][method][-1] - timestamps[app][method][0]).total_seconds()

total_response_times = {}
for app in order_of_apps:
    total_response_times[app] = sum(response_times[app].values())

total_response_times_method = {}
for method in ["basic", "in_context_random", "in_context_pair", "simple_cot"]:
    total_response_times_method[method] = sum([response_times[app][method] for app in order_of_apps])
    


In [16]:
print("Response times for each application")
print(json.dumps(response_times, indent=4))

print("Total response times for each application")
print(json.dumps(total_response_times, indent=4))

print("Total response times for each method")
print(json.dumps(total_response_times_method, indent=4))

Response times for each application
{
    "scarf-beta": {
        "basic": 150.275441,
        "in_context_random": 115.523136,
        "in_context_pair": 132.890735,
        "simple_cot": 341.710863
    },
    "awcm": {
        "basic": 1567.631138,
        "in_context_random": 2189.620544,
        "in_context_pair": 2296.966347,
        "simple_cot": 6339.751316
    },
    "events_lister": {
        "basic": 263.906857,
        "in_context_random": 254.757026,
        "in_context_pair": 228.69902,
        "simple_cot": 572.081405
    },
    "bwapp": {
        "basic": 1291.799325,
        "in_context_random": 1482.660449,
        "in_context_pair": 1532.020459,
        "simple_cot": 3886.150767
    },
    "dvwa": {
        "basic": 627.821671,
        "in_context_random": 809.879373,
        "in_context_pair": 868.715556,
        "simple_cot": 2617.081796
    },
    "jsforum": {
        "basic": 252.201385,
        "in_context_random": 290.119064,
        "in_context_pair": 381.25698