In [None]:
import pandas as pd
import intertrans.protos_pb2 as ptpb
from intertrans.utils import submit_request, launch_inference_endpoints, stop_inference_endpoints

# Experiments Pipeline

In [None]:
def build_unit_tests_request(subset_df, fullset_df, template_name, extraction_name, model_name, base_filename, base_path, server_url):
    batch_request = ptpb.BatchTranslationRequest()
    batch_request.file_base_name = base_filename
    batch_request.file_save_path = base_path

    for index, row in subset_df.iterrows():
        request = ptpb.TranslationRequest()
        request.id = str(index)
        request.seed_language = row['source_lang']
        request.target_language = row['target_lang']
        request.seed_code = row['input_code']
        request.model_name = model_name
        request.used_languages.append("Go")
        request.used_languages.append("Java")
        request.used_languages.append("Python")
        request.used_languages.append("C++")
        request.used_languages.append("JavaScript")
        request.used_languages.append("Rust")


        request.prompt_template_name = template_name
        request.regex_template_name = extraction_name

        #We attach the test cases to the request
        retrieved = fullset_df[(fullset_df.id == row.id) & ((fullset_df.source_lang == row.source_lang) | (fullset_df.target_lang == row.source_lang) & (fullset_df.source_lang == row.target_lang))]
        assert retrieved.shape[0] != 0

        for i, r in retrieved.iterrows():
            #We attach the test case for evaluation. This is not used in the prompt.
            unittest = ptpb.UnitTestCase()
            unittest.language = r['target_lang']
            unittest.test_case = r['test_code']

            request.test_suite.unit_test_suite.append(unittest)

            #The prompt for HumanEval-X leaks the target signature name so the generated code matches the function name expected by the test case
            signature = ptpb.TargetSignature()
            signature.language = r['target_lang']
            signature.signature = r['target_signature']
            request.target_signatures.append(signature)

        batch_request.translation_requests.append(request)

    return batch_request

In [None]:
def build_fuzzy_tests_request(subset_df, template_name, extraction_name, model_name, base_filename, base_path, server_url):
    batch_request = ptpb.BatchTranslationRequest()
    batch_request.file_base_name = base_filename
    batch_request.file_save_path = base_path

    for index, row in subset_df.iterrows():
        request = ptpb.TranslationRequest()
        request.id = str(index)
        request.seed_language = row['source_lang']
        request.target_language = row['target_lang']
        request.seed_code = row['input_code']
        request.model_name = model_name

        request.used_languages.append("Go")
        request.used_languages.append("Java")
        request.used_languages.append("Python")
        request.used_languages.append("C++")
        request.used_languages.append("JavaScript")
        request.used_languages.append("Rust")

        request.prompt_template_name = template_name
        request.regex_template_name = extraction_name

        fuzzytest1 = ptpb.FuzzyTestCase()
        fuzzytest1.stdin_input = row['stdin_input_1']
        fuzzytest1.expected_output = row['expected_output_1']

        fuzzytest2 = ptpb.FuzzyTestCase()
        fuzzytest2.stdin_input = row['stdin_input_2']
        fuzzytest2.expected_output = row['expected_output_2']

        fuzzytest3 = ptpb.FuzzyTestCase()
        fuzzytest3.stdin_input = row['stdin_input_3']
        fuzzytest3.expected_output = row['expected_output_3']

        request.test_suite.fuzzy_suite.append(fuzzytest1)
        request.test_suite.fuzzy_suite.append(fuzzytest2)
        request.test_suite.fuzzy_suite.append(fuzzytest3)

        batch_request.translation_requests.append(request)

    return batch_request

In [None]:
def execute_unit_test_experiment(args_dict):
    launch_ids = launch_inference_endpoints(args_dict['model_name'], args_dict['server_url'])
    request = build_unit_tests_request(**args_dict)
    submit_request(request, args_dict['server_url'])
    stop_inference_endpoints(launch_ids, args_dict['server_url'])

In [None]:
def execute_fuzzy_test_experiment(args_dict):
    launch_ids = launch_inference_endpoints(args_dict['model_name'], args_dict['server_url'])
    request = build_fuzzy_tests_request(**args_dict)
    submit_request(request, args_dict['server_url'])
    stop_inference_endpoints(launch_ids, args_dict['server_url'])

In [None]:
transcoder_dataset = pd.read_json('../datasets/transcoder_dataset_all.jsonl', orient='records', lines=True)
humanevalx_all = pd.read_json('../datasets/humanevalx_dataset_all.jsonl', orient='records', lines=True)
humanevalx_subset = pd.read_json('../datasets/humanevalx_dataset_subset.jsonl', orient='records', lines=True)
codenet_subset = pd.read_json('../datasets/codenet_dataset_subset.jsonl', orient='records', lines=True)

## No Verify

In [None]:
#Run outside of Jupyter
#cd ../engine && go run intertrans.go runserver ../paper/notebooks/configs/config_transcoder_noverify.yaml

### TransCoder Dataset

#### CodeLlama 13B

In [None]:
args_dict = {
    'subset_df': transcoder_dataset,
    'fullset_df': transcoder_dataset,
    'template_name': 'prompt_transcoder',
    'extraction_name': 'temperature',
    'model_name': "codellama/CodeLlama-13b-Instruct-hf",
    'base_filename': 'codellama_13b_transcoder_results_all_depth4',
    'base_path': '../data/raw_outputs/engine',
    'server_url': 'localhost:50051'
}

execute_unit_test_experiment(args_dict)

#### Magicoder

In [None]:
args_dict = {
    'subset_df': transcoder_dataset,
    'fullset_df': transcoder_dataset,
    'template_name': 'prompt_transcoder',
    'extraction_name': 'temperature',
    'model_name': "ise-uiuc/Magicoder-S-DS-6.7B",
    'base_filename': 'magicoder_transcoder_results_all_depth4',
    'base_path': '../data/raw_outputs/engine',
    'server_url': 'localhost:50051'
}

execute_unit_test_experiment(args_dict)

#### StarCoder 2

In [None]:
args_dict = {
    'subset_df': transcoder_dataset,
    'fullset_df': transcoder_dataset,
    'template_name': 'prompt_transcoder',
    'extraction_name': 'temperature',
    'model_name': "bigcode/starcoder2-15b-instruct-v0.1",
    'base_filename': 'starcoder2_transcoder_results_all_depth4',
    'base_path': '../data/raw_outputs/engine',
    'server_url': 'localhost:50051'
}

execute_unit_test_experiment(args_dict)

### HumanEval-X

In [None]:
#Run outside of Jupyter
#cd ../engine && go run intertrans.go runserver ../paper/notebooks/configs/config_codenet_humanevalx_noverify.yaml

#### Code LLaMa

In [None]:
args_dict = {
    'subset_df': humanevalx_subset,
    'fullset_df': humanevalx_all,
    'template_name': 'prompt_humanevalx',
    'extraction_name': 'temperature',
    'model_name': "codellama/CodeLlama-13b-Instruct-hf",
    'base_filename': 'codellama_13b_humanevalx_results_sub_depth4_test',
    'base_path': '../data/raw_outputs/engine',
    'server_url': 'localhost:50051'
}

execute_unit_test_experiment(args_dict)

#### Magicoder

In [None]:
args_dict = {
    'subset_df': humanevalx_subset,
    'fullset_df': humanevalx_all,
    'template_name': 'prompt_humanevalx',
    'extraction_name': 'temperature',
    'model_name': "ise-uiuc/Magicoder-S-DS-6.7B",
    'base_filename': 'magicoder_humanevalx_results_sub_depth4',
    'base_path': '../data/raw_outputs/engine',
    'server_url': 'localhost:50051'
}

execute_unit_test_experiment(args_dict)

#### StarCoder

In [None]:
args_dict = {
    'subset_df': humanevalx_subset,
    'fullset_df': humanevalx_all,
    'template_name': 'prompt_humanevalx',
    'extraction_name': 'temperature',
    'model_name': "bigcode/starcoder2-15b-instruct-v0.1",
    'base_filename': 'starcoder2_humanevalx_results_sub_depth4',
    'base_path': '../data/raw_outputs/engine',
    'server_url': 'localhost:50051'
}

execute_unit_test_experiment(args_dict)

### CodeNet

#### CodeLlama 13B

In [None]:
args_dict = {
    'subset_df': codenet_subset,
    'template_name': 'prompt_codenet',
    'extraction_name': 'temperature',
    'model_name': "codellama/CodeLlama-13b-Instruct-hf",
    'base_filename': 'codellama_13b_codenet_results_sub_depth4',
    'base_path': '../data/raw_outputs/engine',
    'server_url': 'localhost:50051'
}

execute_fuzzy_test_experiment(args_dict)

In [None]:
args_dict = {
    'subset_df': codenet_subset,
    'template_name': 'prompt_codenet',
    'extraction_name': 'temperature',
    'model_name': "ise-uiuc/Magicoder-S-DS-6.7B",
    'base_filename': 'magicoder_codenet_results_sub_depth4',
    'base_path': '../data/raw_outputs/engine',
    'server_url': 'localhost:50051'
}

execute_fuzzy_test_experiment(args_dict)

In [None]:
args_dict = {
    'subset_df': codenet_subset,
    'template_name': 'prompt_codenet',
    'extraction_name': 'temperature',
    'model_name': "bigcode/starcoder2-15b-instruct-v0.1",
    'base_filename': 'starcoder2_codenet_results_sub_depth4',
    'base_path': '../data/raw_outputs/engine',
    'server_url': 'localhost:50051'
}

execute_fuzzy_test_experiment(args_dict)