In [1]:
import subprocess
import pandas as pd
import os
import joblib
import shutil

In [2]:
def run_test(codesmell):
    java_code_labels = f"C:/Users/rachi/OneDrive/Desktop/MyFolder/CodeSmell - 3/data/dataset/{codesmell}/{codesmell}_labels.csv"
    java_code_folder = "C:/Users/rachi/OneDrive/Desktop/MyFolder/CodeSmell - 3/data/java_test"
    java_code_train = "C:/Users/rachi/OneDrive/Desktop/MyFolder/CodeSmell - 3/data/java_train"
    
    ck_jar_path = 'ck-0.7.1.jar'
    ck_output_folder = 'ck_output'
    
    preprocessing_pipeline_path = f'output/{codesmell}/preprocessing_pipeline.joblib'
    selected_features_path = f'output/{codesmell}/selected_features.joblib'
    final_model_path = f'output/{codesmell}/final_model.joblib'
    
    print("Running CK tool to extract metrics...")
    os.makedirs(ck_output_folder, exist_ok=True)

    ck_command = ['java', '-jar', os.path.join("..", ck_jar_path), java_code_folder, 'false', '0', 'false']

    try:
        subprocess.run(ck_command, check=True, capture_output=True, text=True, cwd=ck_output_folder)
        print(f"CK tool finished. Output files saved in: {ck_output_folder}")
    except subprocess.CalledProcessError as e:
        print(f"Error running CK tool: {e}")
        print(f"Stdout: {e.stdout}")
        print(f"Stderr: {e.stderr}")
        os.path.exists(ck_output_folder) and shutil.rmtree(ck_output_folder)
        
    def remove_prefix_path(full_path, prefix_path):
        return os.path.relpath(full_path, prefix_path)
    
    code_labels_df = pd.read_csv(java_code_labels)
    code_labels = {
        (remove_prefix_path(row['file'], java_code_train), row['class']): row['label'] for _, row in code_labels_df.iterrows()
    }
    
    print(f"Running Code Smell Detection ({codesmell})...", end="\n\n")

    try:
        loaded_pipeline = joblib.load(preprocessing_pipeline_path)
        selected_features = joblib.load(selected_features_path)
        loaded_model = joblib.load(final_model_path)

        new_data = pd.read_csv(os.path.join(ck_output_folder, "class.csv"))

        missing_features = [feature for feature in selected_features if feature not in new_data.columns]
        if missing_features:
            print(f"Error: The following selected features are missing in the new data: {missing_features}")
            print("Please ensure the CK output contains the necessary metrics.")

        numerical_cols = new_data.select_dtypes(include=['number']).columns.tolist()
        new_data[numerical_cols] = loaded_pipeline.transform(new_data[numerical_cols])
        new_data_final = new_data[selected_features]
        predictions = loaded_model.predict(new_data_final)

        correct, total = 0, 0
        
        print(f"Code Smell ({codesmell}) Predictions for the Java classes ({java_code_folder}):")
        for index, prediction in enumerate(predictions):
            file_name = remove_prefix_path(new_data['file'].iloc[index], java_code_folder)
            class_name = new_data['class'].iloc[index]
            actual = code_labels[(file_name, class_name)]
            if prediction==actual:
                correct += 1
            total += 1
            # print(f"File: {file_name}, Class: {class_name}, Predicted Label: {prediction}, Actual Label: {actual}")
        
        print(f"Correct {correct}/{total} = {int(correct/total*100)}%")

    except FileNotFoundError as e:
        print(f"Error: Could not find saved model or features: {e.filename}")
    except Exception as e:
        print(f"An error occurred during prediction: {e}")
    finally:
        os.path.exists(ck_output_folder) and shutil.rmtree(ck_output_folder)

In [3]:
run_test('featureenvy')

Running CK tool to extract metrics...
CK tool finished. Output files saved in: ck_output
Running Code Smell Detection (featureenvy)...

Code Smell (featureenvy) Predictions for the Java classes (C:/Users/rachi/OneDrive/Desktop/MyFolder/CodeSmell - 3/data/java_test):
Correct 15/15 = 100%


In [4]:
run_test('godclass')

Running CK tool to extract metrics...
CK tool finished. Output files saved in: ck_output
Running Code Smell Detection (godclass)...

Code Smell (godclass) Predictions for the Java classes (C:/Users/rachi/OneDrive/Desktop/MyFolder/CodeSmell - 3/data/java_test):
Correct 14/15 = 93%


In [5]:
run_test('longmethod')

Running CK tool to extract metrics...
CK tool finished. Output files saved in: ck_output
Running Code Smell Detection (longmethod)...

Code Smell (longmethod) Predictions for the Java classes (C:/Users/rachi/OneDrive/Desktop/MyFolder/CodeSmell - 3/data/java_test):
Correct 15/15 = 100%


In [6]:
run_test('longparameter')

Running CK tool to extract metrics...
CK tool finished. Output files saved in: ck_output
Running Code Smell Detection (longparameter)...

Code Smell (longparameter) Predictions for the Java classes (C:/Users/rachi/OneDrive/Desktop/MyFolder/CodeSmell - 3/data/java_test):
Correct 15/15 = 100%


In [7]:
run_test('refusedbequest')

Running CK tool to extract metrics...
CK tool finished. Output files saved in: ck_output
Running Code Smell Detection (refusedbequest)...

Code Smell (refusedbequest) Predictions for the Java classes (C:/Users/rachi/OneDrive/Desktop/MyFolder/CodeSmell - 3/data/java_test):
Correct 15/15 = 100%


In [8]:
run_test('shotgunsurgery')

Running CK tool to extract metrics...
CK tool finished. Output files saved in: ck_output
Running Code Smell Detection (shotgunsurgery)...

Code Smell (shotgunsurgery) Predictions for the Java classes (C:/Users/rachi/OneDrive/Desktop/MyFolder/CodeSmell - 3/data/java_test):
Correct 15/15 = 100%
