## Calculate Correlation (execute after evaluation)

In [1]:
# CL 7b, DS 7b modularity

import os
import matplotlib.pyplot as plt

from utils.utils_evaluate import compute_pass_at_ks, verify_code_official
from utils.utils import read_jsonl_to_dict, write_dict_to_jsonl, get_code_modularity_score

from scipy import stats


models = ['meta-llama-CodeLlama-7b-hf', 'deepseek-ai-deepseek-coder-6.7b-base']
code_element = ['modularity']

num_code = 100
num_gen = 10
k = 1
base_directory = os.getcwd()
for model in models:
    codes = []
    for element in code_element:
        num_point = 0
        performances = []
        element_values = []
        for code_idx in range(num_code):
            file_name = f'{model}_1shot_10gen_0.1temp_{element}_{code_idx}code_icl_result.jsonl'
            if not os.path.exists(os.path.join(base_directory, "result", "corr_exp_evaluation_result", file_name)):
                continue
            
            num_point += 1 # number of points in the correlation plot (=number of evaluation result)
            results = read_jsonl_to_dict(os.path.join(base_directory, "result", "corr_exp_evaluation_result", file_name))
            passed_results = []
            for result in results:
                assert len(result['passed']) == num_gen
                passed_results.append(result['passed'])

            # code
            codes.append(results[0]['demonstration']['code'][0])
            
            # pass@k
            performances.append(compute_pass_at_ks(passed_results, [k])[k])
            
            # style or modularity
            if element == 'style':
                element_values.append(results[0]['demonstration']['score_style'][0]['score_pep8'])
            elif element == 'modularity':
                element_values.append(results[0]['demonstration']['score_modularity'][0])
        
        # re calculate modularity
        # element_values = []
        # for code in codes:
            # element_values.append(get_code_modularity_score(code))

        # plt.scatter(element_values, [0.5] * len(element_values), color='blue', label='only mod')

        # calculate correlation
        pearsonr_stat = stats.pearsonr(element_values, performances)
        pearsonr, pearsonr_p = pearsonr_stat.correlation, pearsonr_stat.pvalue
        spearmanr_stat = stats.spearmanr(element_values, performances)
        spearmanr, spearmanr_p = spearmanr_stat.correlation, spearmanr_stat.pvalue
        
        performances = [performance * 100 for performance in performances] # for better visualization
        plt.scatter(element_values, performances, color='red', label='Sampled Data')
        plt.xlabel(element)
        plt.ylabel('pass@k')
        plt.legend()
        plt.show()
 
        print(f'model: {model}')
        print(f'pearsonr: {round(pearsonr, 2)}, pearsonr_p: {round(pearsonr_p, 2)}')
        print(f'spearmanr: {round(spearmanr, 2)}, spearmanr_p: {round(spearmanr_p, 2)}')
        print(f'num data: {num_point}')
