In [1]:
import numpy as np
import joblib
from sklearn.model_selection import cross_validate, train_test_split

In [2]:
# Load all the saved models
random_forest = joblib.load(
    "../../Models/requirement_relevancy_experiment/classifier_models/distilbert_random_forest_classifier.joblib"
)

gradient_boosting = joblib.load(
    "../../Models/requirement_relevancy_experiment/classifier_models/distilbert_gradient_boost_classifier.joblib"
)

ada_boost = joblib.load(
    "../../Models/requirement_relevancy_experiment/classifier_models/distilbert_adaboost_classifier.joblib"
)

xgboost = joblib.load(
    "../../Models/requirement_relevancy_experiment/classifier_models/distilbert_xgboost_classifier.joblib"
)

In [3]:
# Load the resampled data

X_resampled = np.loadtxt(
    "../../Datasets/irrelevant_requirements_dataset/distilbert_X_resampled.csv",
    delimiter=",",
)

y_resampled = np.loadtxt(
      "../../Datasets/irrelevant_requirements_dataset/distilbert_y_resampled.csv",
    delimiter=",",
)

In [4]:
X_resampled.shape, y_resampled.shape

((1114, 49152), (1114,))

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42
)

In [6]:
validation_fold = 5

In [13]:
cross_validation_result_dict = {
    "model_name": [],
    "accuracy": [],
    "precision": [],
    "recall": [],
    "f1": [],
    "fit_time": [],
    "score_time": [],
}


def append_scores(
    cross_validation_result_dict,
    model_name: str,
    accuracy: float,
    precision: float,
    recall: float,
    f1: float,
    fit_time: float,
    score_time: float,
):
    cross_validation_result_dict["model_name"].append(model_name)
    cross_validation_result_dict["accuracy"].append(accuracy)
    cross_validation_result_dict["precision"].append(precision)
    cross_validation_result_dict["recall"].append(recall)
    cross_validation_result_dict["f1"].append(f1)
    cross_validation_result_dict["fit_time"].append(fit_time)
    cross_validation_result_dict["score_time"].append(score_time)

    return cross_validation_result_dict

## Cross Validation

Cross validation is a technique for assessing how the results of a statistical analysis will generalize to an independent data set. It is mainly used in settings where the goal is prediction, and one wants to estimate how accurately a predictive model will perform in practice. One round of cross-validation involves partitioning a sample of data into complementary subsets, performing the analysis on one subset (called the training set), and validating the analysis on the other subset (called the validation set or testing set). To reduce variability, multiple rounds of cross-validation are performed using different partitions, and the validation results are averaged over the rounds.

### Cross Validation for Random Forest

In [8]:
random_forest_scores = cross_validate(
    random_forest,
    X_train,
    y_train,
    cv=validation_fold,
    n_jobs=-1,
    scoring=["f1", "accuracy", "precision", "recall"],
)

In [9]:
random_forest_scores

{'fit_time': array([21.36870289, 21.36269975, 21.46144104, 21.08267117, 20.98727417]),
 'score_time': array([0.04220343, 0.0392158 , 0.04178834, 0.0438571 , 0.03693438]),
 'test_f1': array([0.98850575, 0.99428571, 0.99428571, 0.97701149, 0.97206704]),
 'test_accuracy': array([0.98882682, 0.99438202, 0.99438202, 0.97752809, 0.97191011]),
 'test_precision': array([1.        , 0.98863636, 0.98863636, 0.97701149, 0.94565217]),
 'test_recall': array([0.97727273, 1.        , 1.        , 0.97701149, 1.        ])}

In [None]:
append_scores(
    cross_validation_result_dict,
    "Random Forest",
    np.mean(random_forest_scores["test_accuracy"]),
    np.mean(random_forest_scores["test_precision"]),
    np.mean(random_forest_scores["test_recall"]),
    np.mean(random_forest_scores["test_f1"]),
    np.mean(random_forest_scores["fit_time"]),
    np.mean(random_forest_scores["score_time"]),
)

### Adaboost Cross Validation

In [15]:
adaboost_cross_validation_scores = cross_validate(
    ada_boost,
    X_train,
    y_train,
    cv=validation_fold,
    n_jobs=-1,
    scoring=["f1", "accuracy", "precision", "recall"],
)

adaboost_cross_validation_scores

{'fit_time': array([486.62333131, 489.76314998, 494.57358479, 487.87958884,
        489.88954329]),
 'score_time': array([1.06545687, 1.02035284, 0.79375386, 0.88425612, 1.01422167]),
 'test_f1': array([0.93023256, 0.94674556, 0.93413174, 0.92682927, 0.92941176]),
 'test_accuracy': array([0.93296089, 0.9494382 , 0.93820225, 0.93258427, 0.93258427]),
 'test_precision': array([0.95238095, 0.97560976, 0.975     , 0.98701299, 0.95180723]),
 'test_recall': array([0.90909091, 0.91954023, 0.89655172, 0.87356322, 0.90804598])}

In [16]:
append_scores(
    cross_validation_result_dict,
    "AdaBoost",
    np.mean(adaboost_cross_validation_scores["test_accuracy"]),
    np.mean(adaboost_cross_validation_scores["test_precision"]),
    np.mean(adaboost_cross_validation_scores["test_recall"]),
    np.mean(adaboost_cross_validation_scores["test_f1"]),
    np.mean(adaboost_cross_validation_scores["fit_time"]),
    np.mean(adaboost_cross_validation_scores["score_time"]),
)

{'model_name': ['Random Forest', 'AdaBoost'],
 'accuracy': [0.9854058125666938, 0.9371539765237588],
 'precision': [0.9799872790877288, 0.9683621848814326],
 'recall': [0.9908568443051202, 0.9013584117032393],
 'f1': [0.9852311418113768, 0.9334701779590446],
 'fit_time': [21.252557802200318, 489.7458396434784],
 'score_time': [0.04079980850219726, 0.9556082725524903]}

### XGBoost Cross Validation

In [17]:
xgboost_scores = cross_validate(
    xgboost,
    X_train,
    y_train,
    cv=validation_fold,
    scoring=["f1", "accuracy", "precision", "recall"],
)

xgboost_scores

{'fit_time': array([ 82.7177043 ,  82.91661596, 102.46022224,  89.85641599,
        100.41938782]),
 'score_time': array([0.17369747, 0.14978862, 0.20126796, 0.14233232, 0.17237663]),
 'test_f1': array([0.95906433, 0.98285714, 1.        , 0.95857988, 0.96629213]),
 'test_accuracy': array([0.96089385, 0.98314607, 1.        , 0.96067416, 0.96629213]),
 'test_precision': array([0.98795181, 0.97727273, 1.        , 0.98780488, 0.94505495]),
 'test_recall': array([0.93181818, 0.98850575, 1.        , 0.93103448, 0.98850575])}

In [18]:
append_scores(
    cross_validation_result_dict,
    "XGBoost",
    np.mean(xgboost_scores["test_accuracy"]),
    np.mean(xgboost_scores["test_precision"]),
    np.mean(xgboost_scores["test_recall"]),
    np.mean(xgboost_scores["test_f1"]),
    np.mean(xgboost_scores["fit_time"]),
    np.mean(xgboost_scores["score_time"]),
)

{'model_name': ['Random Forest', 'AdaBoost', 'XGBoost'],
 'accuracy': [0.9854058125666938, 0.9371539765237588, 0.9742012428598329],
 'precision': [0.9799872790877288, 0.9683621848814326, 0.9796168715210737],
 'recall': [0.9908568443051202, 0.9013584117032393, 0.9679728317659352],
 'f1': [0.9852311418113768, 0.9334701779590446, 0.9733586973661577],
 'fit_time': [21.252557802200318, 489.7458396434784, 91.6740692615509],
 'score_time': [0.04079980850219726, 0.9556082725524903, 0.16789259910583496]}

### Gradient Boosting Cross Validation


In [19]:
gradient_boosting_cross_validation_scores = cross_validate(
    gradient_boosting,
    X_train,
    y_train,
    cv=validation_fold,
    n_jobs=-1,
    scoring=["f1", "accuracy", "precision", "recall"],
)

In [20]:
append_scores(
    cross_validation_result_dict,
    "Gradient Boosting",
    np.mean(gradient_boosting_cross_validation_scores["test_accuracy"]),
    np.mean(gradient_boosting_cross_validation_scores["test_precision"]),
    np.mean(gradient_boosting_cross_validation_scores["test_recall"]),
    np.mean(gradient_boosting_cross_validation_scores["test_f1"]),
    np.mean(gradient_boosting_cross_validation_scores["fit_time"]),
    np.mean(gradient_boosting_cross_validation_scores["score_time"]),
)

{'model_name': ['Random Forest', 'AdaBoost', 'XGBoost', 'Gradient Boosting'],
 'accuracy': [0.9854058125666938,
  0.9371539765237588,
  0.9742012428598329,
  0.9786830707425773],
 'precision': [0.9799872790877288,
  0.9683621848814326,
  0.9796168715210737,
  0.9864045773348099],
 'recall': [0.9908568443051202,
  0.9013584117032393,
  0.9679728317659352,
  0.9702455590386625],
 'f1': [0.9852311418113768,
  0.9334701779590446,
  0.9733586973661577,
  0.9780300985964798],
 'fit_time': [21.252557802200318,
  489.7458396434784,
  91.6740692615509,
  2859.247578382492],
 'score_time': [0.04079980850219726,
  0.9556082725524903,
  0.16789259910583496,
  0.033298921585083005]}

## Result Export

In [21]:
cross_validation_result_dict

{'model_name': ['Random Forest', 'AdaBoost', 'XGBoost', 'Gradient Boosting'],
 'accuracy': [0.9854058125666938,
  0.9371539765237588,
  0.9742012428598329,
  0.9786830707425773],
 'precision': [0.9799872790877288,
  0.9683621848814326,
  0.9796168715210737,
  0.9864045773348099],
 'recall': [0.9908568443051202,
  0.9013584117032393,
  0.9679728317659352,
  0.9702455590386625],
 'f1': [0.9852311418113768,
  0.9334701779590446,
  0.9733586973661577,
  0.9780300985964798],
 'fit_time': [21.252557802200318,
  489.7458396434784,
  91.6740692615509,
  2859.247578382492],
 'score_time': [0.04079980850219726,
  0.9556082725524903,
  0.16789259910583496,
  0.033298921585083005]}

In [22]:
from pandas import DataFrame

cross_validation_result_df = DataFrame(cross_validation_result_dict)
cross_validation_result_df.to_csv(
    "../../Results/irrelevant_requirements_experiment/classifiers_with_distilbert_cross_validation_results.csv",
    index=False,
)

In [23]:
cross_validation_result_df

Unnamed: 0,model_name,accuracy,precision,recall,f1,fit_time,score_time
0,Random Forest,0.985406,0.979987,0.990857,0.985231,21.252558,0.0408
1,AdaBoost,0.937154,0.968362,0.901358,0.93347,489.74584,0.955608
2,XGBoost,0.974201,0.979617,0.967973,0.973359,91.674069,0.167893
3,Gradient Boosting,0.978683,0.986405,0.970246,0.97803,2859.247578,0.033299
