Model Evaluation

In [407]:
# Ignore warnings
import warnings
warnings.filterwarnings('ignore', category = FutureWarning)

In [None]:
# Import pandas libraries
import pandas as pd

# Load the dataset
data = pd.read_csv("Data/zookeeper_result.csv", delimiter=',')

# Explore the dataset
print(data.head()) 



                                         Method name  C20  C3  C4  C1  C5  C6  \
0        org.apache.zookeeper.graph.Log4JSource.init    8  52   5  42  10  33   
1        org.apache.zookeeper.graph.Log4JSource.main    3  48   7  37  14  31   
2  org.apache.zookeeper.graph.RandomAccessFileRea...    2  30   1  29   5  23   
3  org.apache.zookeeper.graph.servlets.NumEvents....    3  35   6  29   8  24   
4  org.apache.zookeeper.graph.servlets.StaticCont...    4  20   1  18   3  12   

   C2  C21  C18  ...  H4  H1  H2  H3       H12  H13  H14  H15  H5  \
0   5   19   32  ...   1   1   0   1  0.019231  0.0  0.0  1.0   1   
1   4    7    2  ...   1   2   0   2  0.041667  0.0  0.0  2.0   1   
2   0    7   16  ...   1   1   0   1  0.033333  0.0  0.0  1.0   1   
3   0   11   12  ...   1   1   0   1  0.028571  0.0  0.0  1.0   1   
4   1   12    3  ...   1   3   0   3  0.150000  0.0  0.0  3.0   1   

       bug-prone  
0  not bug-prone  
1      bug-prone  
2      bug-prone  
3      bug-prone  
4  

In [409]:
# copy data
data_transform = data.copy()

In [410]:

# Convert 'bug-prone' column to 0 and 1
data_transform['bug-prone'] = data_transform['bug-prone'].apply(lambda x: 1 if x.strip() == 'bug-prone' else 0)

# Display the updated DataFrame
print(data_transform.head()) 


                                         Method name  C20  C3  C4  C1  C5  C6  \
0        org.apache.zookeeper.graph.Log4JSource.init    8  52   5  42  10  33   
1        org.apache.zookeeper.graph.Log4JSource.main    3  48   7  37  14  31   
2  org.apache.zookeeper.graph.RandomAccessFileRea...    2  30   1  29   5  23   
3  org.apache.zookeeper.graph.servlets.NumEvents....    3  35   6  29   8  24   
4  org.apache.zookeeper.graph.servlets.StaticCont...    4  20   1  18   3  12   

   C2  C21  C18  ...  H4  H1  H2  H3       H12  H13  H14  H15  H5  bug-prone  
0   5   19   32  ...   1   1   0   1  0.019231  0.0  0.0  1.0   1          0  
1   4    7    2  ...   1   2   0   2  0.041667  0.0  0.0  2.0   1          1  
2   0    7   16  ...   1   1   0   1  0.033333  0.0  0.0  1.0   1          1  
3   0   11   12  ...   1   1   0   1  0.028571  0.0  0.0  1.0   1          1  
4   1   12    3  ...   1   3   0   3  0.150000  0.0  0.0  3.0   1          1  

[5 rows x 42 columns]


In [411]:
print(data_transform.shape)

(710, 42)


In [412]:

# split feature data and target data
feature_X = data_transform.drop(columns=['Method name','bug-prone'])
y = data_transform['bug-prone']



In [None]:
from imblearn.combine import SMOTETomek
from sklearn.preprocessing import StandardScaler

# Apply SMOTETomek
smote = SMOTETomek(random_state=42)
X_resampled, y_resampled = smote.fit_resample(feature_X, y)

# Apply StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_resampled)



In [414]:
X = X_scaled
y = y_resampled

In [None]:
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.metrics import make_scorer, precision_score, recall_score, f1_score, accuracy_score

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import HistGradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from catboost import CatBoostClassifier



rf_classifier = RandomForestClassifier(n_estimators=500, max_depth=None, min_samples_split=5, 
                                       min_samples_leaf=2, class_weight='balanced', random_state=42)
ada_classifier = AdaBoostClassifier(n_estimators=100, learning_rate=0.8, random_state=42)
bagging_classifier = BaggingClassifier(n_estimators=100, max_samples=0.8, random_state=42)
knn_classifier = KNeighborsClassifier(n_neighbors=3, weights='distance', metric='manhattan')
mlp_classifier = MLPClassifier(activation='relu', hidden_layer_sizes=(200,100), max_iter=2000, 
                            learning_rate='adaptive', random_state=42)
hgb_classifier = HistGradientBoostingClassifier(random_state=42)  
dt_classifier = DecisionTreeClassifier(random_state=42)
svm_classifier = SVC(random_state=42, probability=True, C=10, kernel='poly', gamma='scale')
xgb_classifier = XGBClassifier(n_estimators=200, learning_rate=0.05, max_depth=4, random_state=42)
cb_classifier = CatBoostClassifier(iterations=200, learning_rate=0.05, depth=4, verbose=0, random_state=42)



In [None]:
from sklearn.metrics import (
    make_scorer, precision_score, recall_score, f1_score, 
    matthews_corrcoef, roc_auc_score
)

# Define custom scorers for cross-validation
scoring = {
    'accuracy': 'accuracy',
    'precision': make_scorer(precision_score, average='binary'),
    'recall': make_scorer(recall_score, average='binary'),
    'f1': make_scorer(f1_score, average='binary'),
    'mcc': make_scorer(matthews_corrcoef),
    'auc': make_scorer(roc_auc_score)
}

# Helper function to display results
def print_cv_results(results):
    for metric in scoring.keys():
        mean = results[f'test_{metric}'].mean()
        std = results[f'test_{metric}'].std()
        print(f"{metric.capitalize()}: {mean:.4f}")

# function to perform cross validation
def crossvalidate_fun(classifier, X_train, y_train):
    cv_results = cross_validate(classifier, X_train, y_train, cv=10, scoring=scoring)
    print_cv_results(cv_results)



In [417]:
import pandas as pd
import openpyxl

def write_cv_results_to_excel(classifier, column_letter):
    file_path = "test.xlsx"
    
    # Load the Excel file using openpyxl (preserves formatting)
    wb = openpyxl.load_workbook(file_path)
    sheet = wb.active  # Get the first sheet
    
    # Define the mapping of metrics to row numbers
    row_mapping = {
        "accuracy": 2,
        "precision": 3,
        "recall": 4,
        "f1": 5,
        "mcc": 6,
        "auc": 7
    }
    
    # Perform cross-validation
    cv_results = cross_validate(classifier, X, y, cv=10, scoring=scoring)
    
    # Write results to the specified column
    for metric, row in row_mapping.items():
        mean_value = round(cv_results[f'test_{metric}'].mean(), 4)
        sheet[f"{column_letter}{row}"] = mean_value  # Write to the correct cell
    
    # Save the updated Excel file
    wb.save(file_path)
    print_cv_results(cv_results)
    


In [418]:
# crossvalidate_fun(rf_classifier,X, y)
write_cv_results_to_excel(rf_classifier, "B")


Accuracy: 0.6918
Precision: 0.7069
Recall: 0.6917
F1: 0.6883
Mcc: 0.3943
Auc: 0.6917


In [419]:
# crossvalidate_fun(ada_classifier,X, y)
write_cv_results_to_excel(ada_classifier, "C")

Accuracy: 0.6871
Precision: 0.7058
Recall: 0.6576
F1: 0.6743
Mcc: 0.3823
Auc: 0.6872


In [420]:
# crossvalidate_fun(bagging_classifier,X, y)
write_cv_results_to_excel(bagging_classifier, "D")

Accuracy: 0.6919
Precision: 0.7104
Recall: 0.6825
F1: 0.6836
Mcc: 0.3960
Auc: 0.6917


In [421]:
# crossvalidate_fun(knn_classifier,X, y)
write_cv_results_to_excel(knn_classifier, "E")

Accuracy: 0.6590
Precision: 0.6887
Recall: 0.5982
F1: 0.6302
Mcc: 0.3281
Auc: 0.6589


In [422]:
# crossvalidate_fun(mlp_classifier,X, y)
write_cv_results_to_excel(mlp_classifier, "F")

Accuracy: 0.6640
Precision: 0.6784
Recall: 0.6454
F1: 0.6465
Mcc: 0.3398
Auc: 0.6638


In [423]:
# crossvalidate_fun(hgb_classifier,X, y)
write_cv_results_to_excel(hgb_classifier, "G")

Accuracy: 0.6793
Precision: 0.6933
Recall: 0.6699
F1: 0.6740
Mcc: 0.3654
Auc: 0.6793


In [424]:
# crossvalidate_fun(dt_classifier,X, y)
write_cv_results_to_excel(dt_classifier, "H")

Accuracy: 0.6387
Precision: 0.6491
Recall: 0.6476
F1: 0.6410
Mcc: 0.2843
Auc: 0.6388


In [425]:
# crossvalidate_fun(svm_classifier,X, y)
write_cv_results_to_excel(svm_classifier, "I")

Accuracy: 0.6638
Precision: 0.8467
Recall: 0.4151
F1: 0.5303
Mcc: 0.3861
Auc: 0.6638


In [426]:
# crossvalidate_fun(xgb_classifier,X, y)
write_cv_results_to_excel(xgb_classifier, "J")

Accuracy: 0.6684
Precision: 0.6869
Recall: 0.6481
F1: 0.6557
Mcc: 0.3480
Auc: 0.6684


In [427]:
# crossvalidate_fun(cb_classifier,X, y)
write_cv_results_to_excel(cb_classifier, "K")

Accuracy: 0.6965
Precision: 0.7144
Recall: 0.6668
F1: 0.6813
Mcc: 0.4021
Auc: 0.6964


In [428]:
models = {
    
    "RandomForest" :  RandomForestClassifier(n_estimators=500, max_depth=None, min_samples_split=5, 
                                       min_samples_leaf=2, class_weight='balanced', random_state=42),
    "AdaBoostClassifier" : AdaBoostClassifier(n_estimators=100, learning_rate=0.8, random_state=42),
    "BaggingClassifier" : BaggingClassifier(n_estimators=100, max_samples=0.8, random_state=42),
    "KNeighborsClassifier" : KNeighborsClassifier(n_neighbors=3, weights='distance', metric='manhattan'),
    "MLPClassifier" : MLPClassifier(activation='relu', hidden_layer_sizes=(200,100), max_iter=2000, 
                               learning_rate='adaptive', random_state=42),
    "HistGradientBoostingClassifier" : HistGradientBoostingClassifier(random_state=42),   
    "DecisionTreeClassifier" : DecisionTreeClassifier(random_state=42),
    "SVC" : SVC(random_state=42, probability=True, C=10, kernel='poly', gamma='scale'),
    "XGBoost": XGBClassifier(n_estimators=200, learning_rate=0.05, max_depth=4, random_state=42),
    "CatBoost": CatBoostClassifier(iterations=200, learning_rate=0.05, depth=4, verbose=0, random_state=42)
}

In [None]:
import numpy as np
from sklearn.model_selection import StratifiedKFold

# Initialize cross-validation
kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Store metrics
accuracy_scores, precision_scores, recall_scores, f1_scores, mcc_scores, auc_scores = [], [], [], [], [], []

# Perform 10-Fold Cross-Validation
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    probabilities = []
    max_scores = []
    
    # Train all models and get probabilities
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred_proba = model.predict_proba(X_test)[:, 1]  # Probability for class 1
        probabilities.append(y_pred_proba)
        max_scores.append(roc_auc_score(y_test, y_pred_proba))
    
    # Normalize AUC scores to get weights
    weights = [score / sum(max_scores) for score in max_scores]

    combined_probs = np.sum([weight * probs for weight, probs in zip(weights, probabilities)], axis=0)

    # Determine best threshold using precision-recall
    from sklearn.metrics import precision_recall_curve

    def get_best_threshold_precision_recall(y_true, y_proba):
        precision, recall, thresholds = precision_recall_curve(y_true, y_proba)
        f1_scores = np.where((precision + recall) == 0, 0, 2 * (precision * recall) / (precision + recall))
        best_threshold = thresholds[np.argmax(f1_scores[:-1])]
        return best_threshold

    best_threshold_pr = get_best_threshold_precision_recall(y_test, combined_probs)

    # Convert probabilities to binary predictions using threshold
    predictions = (combined_probs > best_threshold_pr).astype(int)

    # Compute performance metrics
    accuracy_scores.append(accuracy_score(y_test, predictions))
    precision_scores.append(precision_score(y_test, predictions))
    recall_scores.append(recall_score(y_test, predictions))
    f1_scores.append(f1_score(y_test, predictions))
    mcc_scores.append(matthews_corrcoef(y_test, predictions))
    auc_scores.append(roc_auc_score(y_test, combined_probs))

# Print final results (average over 10 folds)
print(f"Final 10-Fold CV Results:")
print(f"Accuracy: {np.mean(accuracy_scores):.4f}")
print(f"Precision: {np.mean(precision_scores):.4f}")
print(f"Recall: {np.mean(recall_scores):.4f}")
print(f"F1-Score: {np.mean(f1_scores):.4f}")
print(f"MCC: {np.mean(mcc_scores):.4f}")
print(f"AUC: {np.mean(auc_scores):.4f}")



Final 10-Fold CV Results:
Accuracy: 0.7789
Precision: 0.7451
Recall: 0.8877
F1-Score: 0.8044
MCC: 0.5781
AUC: 0.8532
