# Install Requirements


In [None]:
!pip install -r requirements.txt

# Import Utility functions

In [3]:
import openai

from utils import (
    apply_heuristics,
    apply_heuristic_in_file,
    heuristic_adjust_spaces,
    get_bleu_and_codebleu,
    get_env_variable,
    get_predictions_from_openai_and_write_to_file,
    read_raw_tufano_dataset_from_csv,
    write_list_to_file,
    read_dataset,
    get_EM_R4R,
    format_file,
    transfer_content_to_another_file,
    get_predictions_from_edit_api_and_write_to_file,
    get_few_shot_predictions_from_openai_and_write_to_file
)

## Set OpenAI API Key

In [None]:
openai.api_key = "<YOUR_OPEN_API_KEY>"

# Zero-Shot Inference and Evaluation using GPT-3.5-Turbo Model



## Dataset Path Definition

In [None]:
OUTPUT_DIRECTORY = "zero_shot_outputs"
TUFANO_SOURCE_FILE_PATH = "datasets/tufano/test_CC_src.txt"
TUFANO_TARGET_FILE_PATH = "datasets/tufano/test_CC_tgt.txt"
TUFANO_RAW_DATASET_FILE_PATH = "datasets/tufano/raw_test.csv"
R4R_SOURCE_FILE_PATH = "datasets/R4R/test_CC_src.txt"
R4R_TARGET_FILE_PATH = "datasets/R4R/test_CC_tgt.txt"

## Inference and Evaluation on Review4Repair Dataset

### data load

In [4]:
code_reviews, buggy_codes, target_codes = read_dataset(
    dataset_name="R4R", source_file_path=R4R_SOURCE_FILE_PATH, target_file_path=R4R_TARGET_FILE_PATH
)

NameError: ignored

### inference

In [None]:
get_predictions_from_openai_and_write_to_file(
    f"{OUTPUT_DIRECTORY}/zero_shot_r4r_predictions_raw_no_heuristic.txt",
    f"{OUTPUT_DIRECTORY}/zero_shot_r4r_ground_truths_raw_no_heuristic.txt",
    code_reviews,
    buggy_codes,
    target_codes
)

### Before applying heuristics

#### accuracy (EM) calculation

#### BLEU and CodeBLEU calculation

### After applying heuristics

#### accuracy (EM) calculation

#### BLEU and CodeBLEU calculation

## Inference and Evaluation on Tufano Dataset

### data load

In [None]:
code_reviews, buggy_codes, target_codes = read_raw_tufano_dataset_from_csv(TUFANO_RAW_DATASET_FILE_PATH)

### inference

In [None]:
get_predictions_from_openai_and_write_to_file(
    f"{OUTPUT_DIRECTORY}/zero_shot_tufano_predictions_raw_no_heuristic.txt",
    f"{OUTPUT_DIRECTORY}/zero_shot_tufano_ground_truths_raw_no_heuristic.txt",
    code_reviews,
    buggy_codes,
    target_codes
)

### Before applying heuristics

#### accuracy (EM) calculation

#### BLEU and CodeBLEU calculation

### After applying heuristics

#### accuracy (EM) calculation

#### BLEU and CodeBLEU calculation

# Few Shot Inference and Evaluation using GPT-3.5-Turbo Model



## Dataset path definition

In [None]:
OUTPUT_DIRECTORY = "few_shot_outputs"
TUFANO_RAW_TRAIN_DATASET_FILE_PATH = "datasets/tufano/raw_train.csv"
TUFANO_RAW_TEST_DATASET_FILE_PATH = "datasets/tufano/raw_test.csv"

R4R_TRAIN_DATASET_SRC_FILE_PATH = "datasets/R4R/train_CC_src.txt"
R4R_TRAIN_DATASET_TGT_FILE_PATH = "datasets/R4R/train_CC_tgt.txt"
R4R_TEST_DATASET_SRC_FILE_PATH = "datasets/R4R/test_CC_src.txt"
R4R_TEST_DATASET_TGT_FILE_PATH = "datasets/R4R/test_CC_tgt.txt"

## Inference and Evaluation on Review4Repair Dataset

### data load

In [None]:
train_code_reviews, train_buggy_codes, train_target_codes = read_dataset(
    dataset_name="R4R",
    source_file_path=R4R_TRAIN_DATASET_SRC_FILE_PATH,
    target_file_path=R4R_TRAIN_DATASET_TGT_FILE_PATH
)

test_code_reviews, test_buggy_codes, test_target_codes = read_dataset(
    dataset_name="R4R",
    source_file_path=R4R_TEST_DATASET_SRC_FILE_PATH,
    target_file_path=R4R_TEST_DATASET_TGT_FILE_PATH
)

### inference

In [None]:
get_few_shot_predictions_from_openai_and_write_to_file(
    prediction_file_path=f"{OUTPUT_DIRECTORY}/few_shot_r4r_predictions_raw_no_heuristic.txt",
    ground_truth_path=f"{OUTPUT_DIRECTORY}/few_shot_r4r_ground_truths_raw_no_heuristic.txt",
    train_dataset=(train_code_reviews, train_buggy_codes, train_target_codes),
    test_dataset=(test_code_reviews, test_buggy_codes, test_target_codes),
    top_k=3
)

### Before applying heuristics

#### accuracy (EM) calculation

#### BLEU and CodeBLEU calculation

### After applying heuristics

#### accuracy (EM) calculation

#### BLEU and CodeBLEU calculation

## Inference and Evaluation on Tufano Dataset

### data load

In [None]:
test_code_reviews, test_buggy_codes, test_target_codes = read_raw_tufano_dataset_from_csv(
    TUFANO_RAW_TEST_DATASET_FILE_PATH
)
train_code_reviews, train_buggy_codes, train_target_codes = read_raw_tufano_dataset_from_csv(
    TUFANO_RAW_TRAIN_DATASET_FILE_PATH
)

### inference

In [None]:
get_few_shot_predictions_from_openai_and_write_to_file(
    prediction_file_path=f"{OUTPUT_DIRECTORY}/few_shot_tufano_predictions_raw_no_heuristic.txt",
    ground_truth_path=f"{OUTPUT_DIRECTORY}/few_shot_tufano_ground_truths_raw_no_heuristic.txt",
    train_dataset=(train_code_reviews, train_buggy_codes, train_target_codes),
    test_dataset=(test_code_reviews, test_buggy_codes, test_target_codes),
    top_k=3
)

### Before applying heuristics

#### accuracy (EM) calculation

#### BLEU and CodeBLEU calculation

### After applying heuristics

#### accuracy (EM) calculation

#### BLEU and CodeBLEU calculation

# Code-DaVinci-Edit-001 Inference and Evaluation



## Dataset Path Definition

In [None]:
OUTPUT_DIRECTORY = "outputs"
TUFANO_SOURCE_FILE_PATH = "datasets/tufano/test_CC_src.txt"
TUFANO_TARGET_FILE_PATH = "datasets/tufano/test_CC_tgt.txt"
TUFANO_RAW_DATASET_FILE_PATH = "datasets/tufano/raw_test.csv"
R4R_SOURCE_FILE_PATH = "datasets/R4R/test_CC_src.txt"
R4R_TARGET_FILE_PATH = "datasets/R4R/test_CC_tgt.txt"

## Inference and Evaluation on Review4Repair Dataset

### data load

In [None]:
code_reviews, buggy_codes, target_codes = read_dataset(
    dataset_name="R4R", source_file_path=R4R_SOURCE_FILE_PATH, target_file_path=R4R_TARGET_FILE_PATH
)

NameError: ignored

### inference

In [None]:
get_predictions_from_edit_api_and_write_to_file(
    f"{OUTPUT_DIRECTORY}/edit_r4r_predictions.txt",
    f"{OUTPUT_DIRECTORY}/edit_r4r_ground_truths.txt",
    code_reviews,
    buggy_codes,
    target_codes
)

### accuracy (EM) calculation

### BLEU and CodeBLEU calculation

## Inference and Evaluation on Tufano Dataset

### data load

In [None]:
code_reviews, buggy_codes, target_codes = read_raw_tufano_dataset_from_csv(TUFANO_RAW_DATASET_FILE_PATH)

### inference

In [None]:
get_predictions_from_edit_api_and_write_to_file(
    f"{OUTPUT_DIRECTORY}/edit_tufano_predictions.txt",
    f"{OUTPUT_DIRECTORY}/edit_tufano_ground_truths.txt",
    code_reviews,
    buggy_codes,
    target_codes
)

### accuracy (EM) calculation

### BLEU and CodeBLEU calculation