# Imports and helpers

In [1]:
from pygount import SourceAnalysis
import os
import json
import shutil
from dotenv import load_dotenv
from complexipy import file_complexity
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
from radon.complexity import cc_visit
import subprocess
import re
import radon
import radon.metrics


In [2]:
load_dotenv()


True

In [3]:
def validate_config(config: dict):
    required_keys = [
    "language", "summarize_codebase", "codebase_readme_path", "files_to_summarize_paths", "codebase_summary_prompt_template", "codebase_summary_prompt_save_path", "codebase_summary_save_path", "function_description_prompt_template", "function_description_prompt_save_path", "function_description_save_path",  "function_generation_prompt_template_type1", "function_generation_prompt_type1_save_path", "function_generation_prompt_template_type2", "function_generation_prompt_type2_save_path", "function_generation_prompt_template_type3", "function_generation_prompt_type3_save_path", "chosen_function_path", "chosen_function", "original_function_save_path", "example_function_description1", "example_function_code1", "example_function_description2", "example_function_code2", "generated_function_type1_save_dir", "generated_function_type2_save_dir", "generated_function_type3_save_dir", "run_codebleu", "codebleu_type1_save_dir", "codebleu_type2_save_dir", "codebleu_type3_save_dir"     
    ]
    
    for key in required_keys:
        if key not in config:
            raise ValueError(f"Missing required key: {key}")


In [4]:
model_names = [
    "GPT-3_5-Turbo", 
    "GPT-4", 
    "DeepSeek-Coder-V2", 
    "CodeQwen1_5-7B-Chat", 
    "Artigenz-Coder-DS-6_7B"
]
type_names = ["type1", "type2", "type3"]


In [5]:
def read_config_files_index(file_index_path: str):
    with open(file_index_path, 'r') as f:
        remaining_confs_paths = f.read().splitlines()

    remaining_confs = []
    for cf_path in remaining_confs_paths:
        conf = json.load(open(cf_path))
        try:
            validate_config(conf)
            remaining_confs.append(conf)
        except Exception as e:
            print(f"Error in {cf_path}: {e}")
    
    return remaining_confs


In [6]:
def load_func_generation_prompts(config):
    with open(config['function_generation_prompt_type1_save_path'], 'r') as f:
        function_generation_prompt_type1 = f.read()
    
    with open(config['function_generation_prompt_type2_save_path'], 'r') as f:
        function_generation_prompt_type2 = f.read()

    function_generation_prompt_type3 = ""
    if config["function_generation_prompt_type3_save_path"] != "":
        with open(config['function_generation_prompt_type3_save_path'], 'r') as f:
            function_generation_prompt_type3 = f.read()

    return function_generation_prompt_type1, function_generation_prompt_type2, function_generation_prompt_type3


In [7]:
def get_loc(filepath: str):
    analysis = SourceAnalysis.from_file(filepath, "pygount")
    return analysis.code


In [8]:
def get_cog_complexity_py(filepath: str):
    fc = file_complexity(filepath)
    return fc.complexity


In [9]:
def get_cog_complexity_js(js_file):
    try:
        result = subprocess.run(
            ['npx', 'ccts-json', js_file],
            capture_output=True, check=True
        )
        text = result.stdout.decode('utf-8').strip()
        cleaned_text = re.sub(r'\x1b\[[0-9;]*m', '', text)
        json_res = json.loads(cleaned_text)

        cog_complexity = next(iter(json_res.values()))['score'] # get the score from the first value in the json
        return cog_complexity

    except subprocess.CalledProcessError as e:
        print(f"Error running js cyclomatic complexity calculator: {e}")
        return None


In [10]:
def get_cc_py(python_file_path: str):
    with open(python_file_path, 'r') as f:
        code = f.read()

    complexity_list = cc_visit(code)
    cc = sum(comp.complexity for comp in complexity_list)
    return cc


In [11]:
def get_cc_js(js_file):
    try:
        result = subprocess.run(
            ['node', 'js_code_metrics.js', js_file],
            capture_output=True, check=True
        )
        text = result.stdout.decode('utf-8').strip()
        cleaned_text = re.sub(r'\x1b\[[0-9;]*m', '', text)
        complexity = int(cleaned_text.split()[-1])
        return complexity

    except subprocess.CalledProcessError as e:
        print(f"Error running js cyclomatic complexity calculator: {e}")
        return None


In [12]:
def get_halstead_py(filepath: str):
    with open(filepath, 'r') as f:
        code = f.read()

    halstead_res = radon.metrics.h_visit(code)
    return halstead_res.total.volume


In [13]:
def get_halstead_js(js_file):
    try:
        result = subprocess.run(
            ['node', 'js_halstead.js', js_file],
            capture_output=True, check=True
        )
        text = result.stdout.decode('utf-8').strip()
        cleaned_text = re.sub(r'\x1b\[[0-9;]*m', '', text)
        halstead = float(cleaned_text.split()[-1])
        return halstead

    except subprocess.CalledProcessError as e:
        print(f"Error running js cyclomatic complexity calculator: {e}")
        return None


In [14]:
def get_name(config):
    # return "/".join(config['original_function_save_path'].split('/')[:-1])
    repo, func = config['original_function_save_path'].split('/')[:-1]
    return repo, func[-1]


In [15]:
def get_function_paths(config:dict, model_name: str, prompt_type: str, generation_num: int=1):
    """Get the path to original code and generated code for a specific model and prompt type.

    Args:
        config (dict): The experiment config 
        model_name (str): name of model, one of ["GPT-3_5-Turbo", "GPT-4", "DeepSeek-Coder-V2", "CodeQwen1_5-7B-Chat", "Artigenz-Coder-DS-6_7B"]
        prompt_type (str): prompt type, one of ["type1", "type2", "type3"]
        generation_num (int): This is the generation_num th generated function. Defaults to 1.
    """
    generated_function_save_dirs = {
        "type1": config['generated_function_type1_save_dir'],
        "type2": config['generated_function_type2_save_dir'],
        "type3": config['generated_function_type3_save_dir']
    }

    filename = config['chosen_function'].split(".")
    generated_function_save_filename = f"{filename[0]}_{generation_num}.{filename[1]}"
    generated_function_save_path = f"{generated_function_save_dirs[prompt_type]}/{model_name}/GENERATED-{generated_function_save_filename}"
    original_function_save_path = config['original_function_save_path']

    return {"original": original_function_save_path, "generated": generated_function_save_path}


---
# Running on all configs

In [16]:
configs = read_config_files_index("localllm_remaining_confs.txt")
print(f"read {len(configs)} config files")


read 10 config files


In [17]:
measure_loc = True
measure_cyc_complexity = True
measure_cog_complexity = True
measure_halstead = True

loc_data = [] # repo_name, function_num, prompt_type, model, original loc, generated loc
cc_data = [] # repo_name, function_num, prompt_type, model, original cc, generated cc
cog_data = [] # repo_name, function_num, prompt_type, model, original cog, generated cog
halstead_data = [] # repo_name, function_num, prompt_type, model, original halstead, generated halstead

cc_functions = {'python': get_cc_py, 'JS': get_cc_js, 'TS': get_cc_js}
cogc_functions = {'python': get_cog_complexity_py, 'JS': get_cog_complexity_js, 'TS': get_cog_complexity_js}
halstead_functions = {'python': get_halstead_py, 'JS': get_halstead_js, 'TS': get_halstead_js}

for conf in tqdm(configs, desc="Configs"):
    for model_name in tqdm(model_names, desc="models", leave=False):
        for prompt_type in tqdm(type_names, desc="prompt types", leave=False):
            if prompt_type == "type3" and conf["generated_function_type3_save_dir"] == "":
                # skip type3 prompt because it doesnt exist
                continue

            paths = get_function_paths(conf, model_name, prompt_type)
            original_path, generated_path = paths['original'], paths['generated']
            repo, func_num = get_name(conf)

            if measure_loc:
                original_loc = get_loc(original_path)
                generated_loc = get_loc(generated_path)

                loc_data.append([repo, func_num, prompt_type, model_name, original_loc, generated_loc])

            if measure_cyc_complexity:
                original_cc = cc_functions[conf['language']](original_path)
                generated_cc = cc_functions[conf['language']](generated_path)

                cc_data.append([repo, func_num, prompt_type, model_name, original_cc, generated_cc])

            if measure_cog_complexity:
                original_cog = cogc_functions[conf['language']](original_path)
                generated_cog = cogc_functions[conf['language']](generated_path)

                cog_data.append([repo, func_num, prompt_type, model_name, original_cog, generated_cog])

            if measure_halstead:
                original_halstead = halstead_functions[conf['language']](original_path)
                generated_halstead = halstead_functions[conf['language']](generated_path)

                halstead_data.append([repo, func_num, prompt_type, model_name, original_halstead, generated_halstead])


Configs:   0%|          | 0/10 [00:00<?, ?it/s]

models:   0%|          | 0/5 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

models:   0%|          | 0/5 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

models:   0%|          | 0/5 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

Error running js cyclomatic complexity calculator: Command '['node', 'js_code_metrics.js', 'craftship_codebox-npm/function3/GENERATED/type3/GPT-3_5-Turbo/GENERATED-put_1.js']' returned non-zero exit status 1.
Error running js cyclomatic complexity calculator: Command '['node', 'js_halstead.js', 'craftship_codebox-npm/function3/GENERATED/type3/GPT-3_5-Turbo/GENERATED-put_1.js']' returned non-zero exit status 1.


prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

Error running js cyclomatic complexity calculator: Command '['node', 'js_code_metrics.js', 'craftship_codebox-npm/function3/GENERATED/type1/CodeQwen1_5-7B-Chat/GENERATED-put_1.js']' returned non-zero exit status 1.
Error running js cyclomatic complexity calculator: Command '['node', 'js_halstead.js', 'craftship_codebox-npm/function3/GENERATED/type1/CodeQwen1_5-7B-Chat/GENERATED-put_1.js']' returned non-zero exit status 1.


prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

models:   0%|          | 0/5 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

models:   0%|          | 0/5 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

models:   0%|          | 0/5 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

models:   0%|          | 0/5 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

models:   0%|          | 0/5 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

models:   0%|          | 0/5 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

Error running js cyclomatic complexity calculator: Command '['node', 'js_code_metrics.js', 'StackJanitor/function2/GENERATED/type3/CodeQwen1_5-7B-Chat/GENERATED-logCloudFormationStack_1.ts']' returned non-zero exit status 1.
Error running js cyclomatic complexity calculator: Command '['node', 'js_halstead.js', 'StackJanitor/function2/GENERATED/type3/CodeQwen1_5-7B-Chat/GENERATED-logCloudFormationStack_1.ts']' returned non-zero exit status 1.


prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

models:   0%|          | 0/5 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

Error running js cyclomatic complexity calculator: Command '['node', 'js_code_metrics.js', 'StackJanitor/function3/GENERATED/type3/GPT-4/GENERATED-monitorCloudFormationStack_1.ts']' returned non-zero exit status 1.
Error running js cyclomatic complexity calculator: Command '['node', 'js_halstead.js', 'StackJanitor/function3/GENERATED/type3/GPT-4/GENERATED-monitorCloudFormationStack_1.ts']' returned non-zero exit status 1.


prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

prompt types:   0%|          | 0/3 [00:00<?, ?it/s]

In [18]:
loc_df = pd.DataFrame(loc_data, columns=["Repo", "FunctionNumber", "PromptType", "Model", "OriginalLOC", "GeneratedLOC"])
cc_df = pd.DataFrame(cc_data, columns=["Repo", "FunctionNumber", "PromptType", "Model", "OriginalCC", "GeneratedCC"])
cog_df = pd.DataFrame(cog_data, columns=["Repo", "FunctionNumber", "PromptType", "Model", "OriginalCOG", "GeneratedCOG"])
halstead_df = pd.DataFrame(halstead_data, columns=["Repo", "FunctionNumber", "PromptType", "Model", "OriginalHalstead", "GeneratedHalstead"])


In [19]:
loc_df.head()


Unnamed: 0,Repo,FunctionNumber,PromptType,Model,OriginalLOC,GeneratedLOC
0,craftship_codebox-npm,1,type1,GPT-3_5-Turbo,100,41
1,craftship_codebox-npm,1,type2,GPT-3_5-Turbo,100,50
2,craftship_codebox-npm,1,type3,GPT-3_5-Turbo,100,46
3,craftship_codebox-npm,1,type1,GPT-4,100,75
4,craftship_codebox-npm,1,type2,GPT-4,100,64


In [20]:
cc_df.head()


Unnamed: 0,Repo,FunctionNumber,PromptType,Model,OriginalCC,GeneratedCC
0,craftship_codebox-npm,1,type1,GPT-3_5-Turbo,11,12.0
1,craftship_codebox-npm,1,type2,GPT-3_5-Turbo,11,13.0
2,craftship_codebox-npm,1,type3,GPT-3_5-Turbo,11,11.0
3,craftship_codebox-npm,1,type1,GPT-4,11,13.0
4,craftship_codebox-npm,1,type2,GPT-4,11,13.0


In [21]:
cog_df.head()


Unnamed: 0,Repo,FunctionNumber,PromptType,Model,OriginalCOG,GeneratedCOG
0,craftship_codebox-npm,1,type1,GPT-3_5-Turbo,11,10
1,craftship_codebox-npm,1,type2,GPT-3_5-Turbo,11,12
2,craftship_codebox-npm,1,type3,GPT-3_5-Turbo,11,7
3,craftship_codebox-npm,1,type1,GPT-4,11,18
4,craftship_codebox-npm,1,type2,GPT-4,11,15


In [22]:
halstead_df.head()


Unnamed: 0,Repo,FunctionNumber,PromptType,Model,OriginalHalstead,GeneratedHalstead
0,craftship_codebox-npm,1,type1,GPT-3_5-Turbo,3613.812,1829.796
1,craftship_codebox-npm,1,type2,GPT-3_5-Turbo,3613.812,1924.088
2,craftship_codebox-npm,1,type3,GPT-3_5-Turbo,3613.812,1726.709
3,craftship_codebox-npm,1,type1,GPT-4,3613.812,2895.571
4,craftship_codebox-npm,1,type2,GPT-4,3613.812,2599.581


In [23]:
loc_df.to_csv("loc.csv")
cc_df.to_csv("cc.csv")
cog_df.to_csv("cog.csv")
halstead_df.to_csv("halstead.csv")


---
# Experiments