# CodeBLEU Score Calculation for All Functions 

## Imports and Helpers

In [None]:
import os
import json
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd

from CodebleuCalculator import codebleu_score_calculator
from HelperFunction import read_config_files_index, get_function_paths, get_name

---

## Running on All Configs

In [None]:
configs = read_config_files_index("config_files.txt")
print(f"read {len(configs)} config files")

read 10 config files


In [None]:
model_names = [
    "GPT-3_5-Turbo", 
    "GPT-4", 
    "DeepSeek-Coder-V2", 
    "CodeQwen1_5-7B-Chat", 
    "Artigenz-Coder-DS-6_7B"
]
type_names = ["type1", "type2", "type3"]
languages = {
    "JS": "javascript",
    "python": "python",
    "TS": "javascript"
}

In [None]:
codebleu_scores = []

for conf in tqdm(configs, desc="Configs"):
    for model_name in tqdm(model_names, desc="models", leave=False):
        for prompt_type in tqdm(type_names, desc="prompt types", leave=False):
            if prompt_type == "type3" and conf["generated_function_type3_save_dir"] == "":
                # skip type3 prompt because it doesnt exist
                continue

            paths = get_function_paths(conf, model_name, prompt_type)
            original_path, generated_path = paths['original'], paths['generated']
            repo, func_num = get_name(conf)

            codebleu_score = codebleu_score_calculator(
                        original_path,
                        generated_path,
                        languages[conf['language']]
                    )
            
            codebleu_scores.append([repo, func_num, prompt_type, model_name, codebleu_score['codebleu']])

In [None]:
codebleu_scores_df = pd.DataFrame(codebleu_scores, columns=["Repo", "FunctionNumber", "PromptType", "Model", "CodeBLEU"])

In [None]:
codebleu_scores_df.to_csv("csvs/code quality metrics/Codebleu.csv")

----