In [1]:
import glob
import json
import os
import re

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# Saving SnR Fixed Files as JSON

In [2]:
def extract_imports(java_code):
    # Regular expression to find import statements
    import_statements = re.findall(r"(import .*?;)", java_code)

    return import_statements

In [3]:
snr_fixed_path = "/home/azmain/snr_fixed/"
os.chdir(snr_fixed_path)

# snr_fixed_files = [f for f in os.listdir(snr_fixed_path) if f.endswith(".java")]
snr_fixed_files = []

numbers = re.compile(r'(\d+)')
def numericalSort(value):
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

for file in sorted(glob.glob("*.java"), key=numericalSort):
    snr_fixed_files.append(file)

print(snr_fixed_files)
print()
snr_fixed_imports = []
for filename in snr_fixed_files:
    with open(os.path.join(snr_fixed_path, filename), 'r') as file:
        content = file.read()
        snr_fixed_imports.append(extract_imports(content))
print(snr_fixed_imports)

['Android01.java', 'Android02.java', 'Android03.java', 'Android04.java', 'Android05.java', 'Android06.java', 'Android07.java', 'Android08.java', 'Android09.java', 'Android10.java', 'Android11.java', 'Android12.java', 'Android13.java', 'Android14.java', 'Android15.java', 'Android16.java', 'Android17.java', 'Android18.java', 'Android19.java', 'Android20.java', 'Android21.java', 'Android22.java', 'Android23.java', 'Android24.java', 'Android25.java', 'Android26.java', 'Android27.java', 'Android28.java', 'Android29.java', 'Android30.java', 'Android31.java', 'Android32.java', 'Android33.java', 'Android34.java', 'Android35.java', 'Android36.java', 'Android37.java', 'Android38.java', 'Android39.java', 'Android40.java', 'Android41.java', 'Android42.java', 'Android43.java', 'Android44.java', 'Android45.java', 'Android46.java', 'Android47.java', 'Android48.java', 'Android49.java', 'Android50.java', 'Class_1.java', 'Class_2.java', 'Class_3.java', 'Class_4.java', 'Class_5.java', 'Class_6.java', 'Cl

In [4]:
def get_correct_imports(outputFiles):
    correct_outputs = []
    for output in outputFiles:
        correct_output_list = json.load(open(output))['total_imports']
        correct_output_list = ["import "+i+";" for i in correct_output_list]
        correct_outputs.append(correct_output_list)

    for import_lines in correct_outputs:
        if "import gen.R;" in import_lines:
            import_lines.remove("import gen.R;")

    return correct_outputs

In [5]:
os.chdir("/home/azmain/snr_all_json/")

outputFiles = []

numbers = re.compile(r'(\d+)')
def numericalSort(value):
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

for file in sorted(glob.glob("*.benchmark_log.json"), key=numericalSort):
    outputFiles.append(file)

print(outputFiles)
print()

correct_imports = get_correct_imports(outputFiles)
print(correct_imports)

['Android01.java.json.benchmark_log.json', 'Android02.java.json.benchmark_log.json', 'Android03.java.json.benchmark_log.json', 'Android04.java.json.benchmark_log.json', 'Android05.java.json.benchmark_log.json', 'Android06.java.json.benchmark_log.json', 'Android07.java.json.benchmark_log.json', 'Android08.java.json.benchmark_log.json', 'Android09.java.json.benchmark_log.json', 'Android10.java.json.benchmark_log.json', 'Android11.java.json.benchmark_log.json', 'Android12.java.json.benchmark_log.json', 'Android13.java.json.benchmark_log.json', 'Android14.java.json.benchmark_log.json', 'Android15.java.json.benchmark_log.json', 'Android16.java.json.benchmark_log.json', 'Android17.java.json.benchmark_log.json', 'Android18.java.json.benchmark_log.json', 'Android19.java.json.benchmark_log.json', 'Android20.java.json.benchmark_log.json', 'Android21.java.json.benchmark_log.json', 'Android22.java.json.benchmark_log.json', 'Android23.java.json.benchmark_log.json', 'Android24.java.json.benchmark_lo

In [6]:
def store_result(save_directory, code_name, expected_list, predicted_list):

    # Validate lengths
    if len(expected_list) != len(predicted_list):
        raise ValueError("The length of expected_list and predicted_list lists must be equal")

    for index, (expected, predicted) in enumerate(zip(expected_list, predicted_list)):
        file_name = ""
        if code_name == "android":
            if index<9:
                file_name = f"Android0{index+1}"
            else:
                file_name = f"Android{index+1}"
        elif code_name == "jdk":
            file_name = f"Class_{index+1}" 
        elif code_name == "hibernate":
            file_name = f"hibernate_class_{index+1}"
        elif code_name == "jodatime":
            if index<9:
                file_name = f"JodaTime0{index+1}"
            else:
                file_name = f"JodaTime{index+1}"
        elif code_name == "gwt":
            file_name = f"gwt_class_{index+1}"
        elif code_name == "xstream":
            file_name = f"xstream_class_{index+1}"

        full_path = os.path.join(save_directory, file_name)
        data = {
            "filename": file_name,
            "expected_output": expected,
            "snr_fixed_output": predicted
        }
        
        with open(f"{full_path}.json", "w") as file:
            json.dump(data, file, indent=4)

    print("JSON files generated successfully!")

In [7]:
save_directory = "/home/azmain/snr_fixed_json/"
code_name = "android"
expected_imports = correct_imports[:50]
android_y_true = expected_imports
predicted_imports = snr_fixed_imports[:50]
android_y_pred = predicted_imports
store_result(save_directory, code_name, expected_imports, predicted_imports)

JSON files generated successfully!


In [8]:
save_directory = "/home/azmain/snr_fixed_json/"
code_name = "jdk"
expected_imports = correct_imports[50:73]
jdk_y_true = expected_imports
predicted_imports = snr_fixed_imports[50:73]
jdk_y_pred = predicted_imports
store_result(save_directory, code_name, expected_imports, predicted_imports)

JSON files generated successfully!


In [9]:
save_directory = "/home/azmain/snr_fixed_json/"
code_name = "hibernate"
expected_imports = correct_imports[173:223]
hibernate_y_true = expected_imports
predicted_imports = snr_fixed_imports[173:223]
hibernate_y_pred = predicted_imports
store_result(save_directory, code_name, expected_imports, predicted_imports)

JSON files generated successfully!


In [10]:
save_directory = "/home/azmain/snr_fixed_json/"
code_name = "jodatime"
expected_imports = correct_imports[73:123]
jodatime_y_true = expected_imports
predicted_imports = snr_fixed_imports[73:123]
jodatime_y_pred = predicted_imports
store_result(save_directory, code_name, expected_imports, predicted_imports)

JSON files generated successfully!


In [11]:
save_directory = "/home/azmain/snr_fixed_json/"
code_name = "gwt"
expected_imports = correct_imports[123:173]
gwt_y_true = expected_imports
predicted_imports = snr_fixed_imports[123:173]
gwt_y_pred = predicted_imports
store_result(save_directory, code_name, expected_imports, predicted_imports)

JSON files generated successfully!


In [12]:
save_directory = "/home/azmain/snr_fixed_json/"
code_name = "xstream"
expected_imports = correct_imports[223:267]
xstream_y_true = expected_imports
predicted_imports = snr_fixed_imports[223:267]
xstream_y_pred = predicted_imports
store_result(save_directory, code_name, expected_imports, predicted_imports)

JSON files generated successfully!


# Calculating SnR Benchmark

In [13]:
def pred_process(y_pred, y_true):
    y_pred_processed = []
    y_true_processed = []
    
    for pred, correct_imports in zip(y_pred, y_true):
        max_length = max(len(pred), len(correct_imports))
        correct_preds = list(set(pred).intersection(correct_imports))
        
        for i in range(0, max_length):
            if i<len(correct_preds):
                y_pred_processed.append(1)
                y_true_processed.append(1)
            else:
                if i<len(correct_imports):
                    y_pred_processed.append(0)
                    y_true_processed.append(1)
                else:
                    y_pred_processed.append(1)
                    y_true_processed.append(0)
            
    print(y_pred_processed)
    print()
    print(y_true_processed)
    print()
    return y_pred_processed, y_true_processed

In [14]:
def eval_performance(y_pred, y_true):   
    print(json.dumps({
        "accuracy": accuracy_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred),
        "recall": recall_score(y_true, y_pred),
        "precision": precision_score(y_true, y_pred)
    }, indent=2))

In [15]:
# Prediction for Android Classes

print("\nPrediction for Android Classes:\n")

y_pred_processed, y_true_processed = pred_process(android_y_pred, android_y_true)
eval_performance(y_pred_processed, y_true_processed)


Prediction for Android Classes:

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

In [16]:
# Prediction for JDK Classes

print("\nPrediction for JDK Classes:\n")

y_pred_processed, y_true_processed = pred_process(jdk_y_pred, jdk_y_true)
eval_performance(y_pred_processed, y_true_processed)


Prediction for JDK Classes:

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

In [17]:
# Prediction for Hibernate Classes

print("\nPrediction for Hibernate Classes:\n")

y_pred_processed, y_true_processed = pred_process(hibernate_y_pred, hibernate_y_true)
eval_performance(y_pred_processed, y_true_processed)


Prediction for Hibernate Classes:

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 

In [18]:
# Prediction for Joda-Time Classes

print("\nPrediction for Joda-Time Classes:\n")

y_pred_processed, y_true_processed = pred_process(jodatime_y_pred, jodatime_y_true)
eval_performance(y_pred_processed, y_true_processed)


Prediction for Joda-Time Classes:

[1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

In [19]:
# Prediction for GWT Classes

print("\nPrediction for GWT Classes:\n")

y_pred_processed, y_true_processed = pred_process(gwt_y_pred, gwt_y_true)
eval_performance(y_pred_processed, y_true_processed)


Prediction for GWT Classes:

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

[1, 1, 1, 1, 1, 1

In [20]:
# Prediction for XStream Classes

print("\nPrediction for XStream Classes:\n")

y_pred_processed, y_true_processed = pred_process(xstream_y_pred, xstream_y_true)
eval_performance(y_pred_processed, y_true_processed)


Prediction for XStream Classes:

[1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 