In [26]:
import pandas as pd
import os

In [27]:
os.chdir(r'C:\SML_Projects\SML_CVE_type_cwe_predict')

In [28]:
df = pd.read_csv("data/preprocessed/preprocessed_dataset.csv")

In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102963 entries, 0 to 102962
Data columns (total 29 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   cve_id                      102963 non-null  float64
 1   description                 102963 non-null  float64
 2   cvss_score                  102963 non-null  float64
 3   cwe                         102963 non-null  float64
 4   vendor                      102963 non-null  float64
 5   product                     102963 non-null  float64
 6   publish_date                102963 non-null  float64
 7   type                        102963 non-null  float64
 8   vendor_freq                 102963 non-null  float64
 9   product_freq                102963 non-null  float64
 10  desc_len                    102963 non-null  float64
 11  desc_word_count             102963 non-null  float64
 12  desc_num_count              102963 non-null  float64
 13  desc_upper_rat

In [30]:
df = df.sample(frac=0.1, random_state=42).reset_index(drop=True)

In [31]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10296 entries, 0 to 10295
Data columns (total 29 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   cve_id                      10296 non-null  float64
 1   description                 10296 non-null  float64
 2   cvss_score                  10296 non-null  float64
 3   cwe                         10296 non-null  float64
 4   vendor                      10296 non-null  float64
 5   product                     10296 non-null  float64
 6   publish_date                10296 non-null  float64
 7   type                        10296 non-null  float64
 8   vendor_freq                 10296 non-null  float64
 9   product_freq                10296 non-null  float64
 10  desc_len                    10296 non-null  float64
 11  desc_word_count             10296 non-null  float64
 12  desc_num_count              10296 non-null  float64
 13  desc_upper_ratio            102

In [32]:
selected_features = []

for target in ['type', 'cvss_score']:
    corr = df.corr()[target].abs()
    features = corr[corr >= 0.04].index.tolist()
    
    if target in features:
        features.remove(target)
    
    selected_features += features 

selected_features = list(set(selected_features) - set(['type', 'cvss_score']))

print("Selected features:", selected_features)

Selected features: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier

In [34]:
x = df.drop(['type', 'cvss_score'], axis=1)   
y = df[['type', 'cvss_score']] 

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [35]:
kf = KFold(n_splits=3, shuffle=True, random_state=42)

# Logistic Regression

In [36]:
lr = LogisticRegression()
multi_lr = MultiOutputClassifier(lr)

multi_lr.fit(x_train[selected_features], y_train)
y_pred = multi_lr.predict(x_test[selected_features])

lr_accuracy_type = accuracy_score(y_test['type'], y_pred[:, 0])
lr_accuracy_cvss_score  = accuracy_score(y_test['cvss_score'], y_pred[:, 1])

lr_scores_type = cross_val_score(lr, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
lr_scores_cvss_score  = cross_val_score(lr, x[selected_features], y['cvss_score'], cv=kf, scoring='f1_macro')

print("Logistic Regression Accuracy for 'type':", lr_accuracy_type)
print("Logistic Regression Accuracy for 'cvss_score' :", lr_accuracy_cvss_score)

print("K-Fold mean F1 (type):", lr_scores_type.mean())
print("K-Fold std  F1 (type):", lr_scores_type.std())

print("K-Fold mean F1 (cvss_score):", lr_scores_cvss_score.mean())
print("K-Fold std  F1 (cvss_score):", lr_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

Logistic Regression Accuracy for 'type': 0.7592233009708738
Logistic Regression Accuracy for 'cvss_score' : 0.5951456310679611
K-Fold mean F1 (type): 0.5923091354855772
K-Fold std  F1 (type): 0.006250180014671314
K-Fold mean F1 (cvss_score): 0.30486384441531506
K-Fold std  F1 (cvss_score): 0.008224601121231233
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       0.00      0.00      0.00        28
         1.0       0.97      0.78      0.87        50
         2.0       0.80      0.95      0.87       103
         3.0       0.22 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


# Decision Tree

In [59]:
dt = DecisionTreeClassifier(random_state=42)
multi_dt = MultiOutputClassifier(dt)

multi_dt.fit(x_train[selected_features], y_train)
y_pred = multi_dt.predict(x_test[selected_features])

dt_accuracy_type = accuracy_score(y_test['type'], y_pred[:, 0])
dt_accuracy_cvss_score  = accuracy_score(y_test['cvss_score'], y_pred[:, 1])

dt_scores_type = cross_val_score(dt, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
dt_scores_cvss_score  = cross_val_score(dt, x[selected_features], y['cvss_score'], cv=kf, scoring='f1_macro')

print("Decision Tree Accuracy for 'type':", dt_accuracy_type)
print("Decision Tree Accuracy for 'cvss_score' :", dt_accuracy_cvss_score)

print("K-Fold mean F1 (type):", dt_scores_type.mean())
print("K-Fold std  F1 (type):", dt_scores_type.std())

print("K-Fold mean F1 (cvss_score):", dt_scores_cvss_score.mean())
print("K-Fold std  F1 (cvss_score):", dt_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

Decision Tree Accuracy for 'type': 0.9349514563106797
Decision Tree Accuracy for 'cvss_score' : 0.6019417475728155
K-Fold mean F1 (type): 0.8843243019970249
K-Fold std  F1 (type): 0.012501681914811477
K-Fold mean F1 (cvss_score): 0.4789049058718095
K-Fold std  F1 (cvss_score): 0.008466421661457642
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       0.88      0.79      0.83        28
         1.0       1.00      0.94      0.97        50
         2.0       0.85      0.90      0.88       103
         3.0       0.95      0.93    

# RandomForest

In [60]:
rf = RandomForestClassifier(random_state=42)
multi_rf = MultiOutputClassifier(rf)

multi_rf.fit(x_train[selected_features], y_train)
y_pred = multi_rf.predict(x_test[selected_features])

rf_accuracy_type = accuracy_score(y_test['type'], y_pred[:, 0])
rf_accuracy_cvss_score  = accuracy_score(y_test['cvss_score'], y_pred[:, 1])

rf_scores_type = cross_val_score(rf, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
rf_scores_cvss_score  = cross_val_score(rf, x[selected_features], y['cvss_score'], cv=kf, scoring='f1_macro')

print("Random Forest Accuracy for 'type':", rf_accuracy_type)
print("Random Forest Accuracy for 'cvss_score' :", rf_accuracy_cvss_score)

print("K-Fold mean F1 (type):", rf_scores_type.mean())
print("K-Fold std  F1 (type):", rf_scores_type.std())

print("K-Fold mean F1 (cvss_score):", rf_scores_cvss_score.mean())
print("K-Fold std  F1 (cvss_score):", rf_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

Random Forest Accuracy for 'type': 0.9121359223300971
Random Forest Accuracy for 'cvss_score' : 0.6868932038834952
K-Fold mean F1 (type): 0.8280016755912566
K-Fold std  F1 (type): 0.007858586242875868
K-Fold mean F1 (cvss_score): 0.5528414174057499
K-Fold std  F1 (cvss_score): 0.011332850441586832
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       1.00      0.18      0.30        28
         1.0       0.98      0.88      0.93        50
         2.0       0.83      0.97      0.90       103
         3.0       0.78      0.88    

# Gradient Boosting

In [39]:
gb = GradientBoostingClassifier(random_state=42)
multi_gbr = MultiOutputClassifier(gb)

multi_gbr.fit(x_train[selected_features], y_train)
y_pred = multi_gbr.predict(x_test[selected_features])

gb_accuracy_type = accuracy_score(y_test['type'], y_pred[:, 0])
gb_accuracy_cvss_score  = accuracy_score(y_test['cvss_score'], y_pred[:, 1])

gb_scores_type = cross_val_score(gb, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
gb_scores_cvss_score  = cross_val_score(gb, x[selected_features], y['cvss_score'], cv=kf, scoring='f1_macro')

print("Gradient Boosting Accuracy for 'type':", gb_accuracy_type)
print("Gradient Boosting Accuracy for 'cvss_score' :", gb_accuracy_cvss_score)

print("K-Fold mean F1 (type):", gb_scores_type.mean())
print("K-Fold std  F1 (type):", gb_scores_type.std())

print("K-Fold mean F1 (cvss_score):", gb_scores_cvss_score.mean())
print("K-Fold std  F1 (cvss_score):", gb_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

Gradient Boosting Accuracy for 'type': 0.9436893203883495
Gradient Boosting Accuracy for 'cvss_score' : 0.6524271844660194
K-Fold mean F1 (type): 0.9275058485221206
K-Fold std  F1 (type): 0.0012750347146695893
K-Fold mean F1 (cvss_score): 0.5092599386132255
K-Fold std  F1 (cvss_score): 0.010106767892958491
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       1.00      0.86      0.92        28
         1.0       1.00      0.92      0.96        50
         2.0       0.88      0.95      0.91       103
         3.0       0.95     

# Extra Tree

In [40]:
et = ExtraTreesClassifier(random_state=42)
multi_et = MultiOutputClassifier(et)

multi_et.fit(x_train[selected_features], y_train)
y_pred = multi_et.predict(x_test[selected_features])

et_accuracy_type = accuracy_score(y_test['type'], y_pred[:, 0])
et_accuracy_cvss_score  = accuracy_score(y_test['cvss_score'], y_pred[:, 1])

et_scores_type = cross_val_score(et, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
et_scores_cvss_score  = cross_val_score(et, x[selected_features], y['cvss_score'], cv=kf, scoring='f1_macro')

print("Extra Trees Accuracy for 'type':", et_accuracy_type)
print("Extra Trees Accuracy for 'cvss_score' :", et_accuracy_cvss_score)

print("K-Fold mean F1 (type):", et_scores_type.mean())
print("K-Fold std  F1 (type):", et_scores_type.std())

print("K-Fold mean F1 (cvss_score):", et_scores_cvss_score.mean())
print("K-Fold std  F1 (cvss_score):", et_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

Extra Trees Accuracy for 'type': 0.9014563106796116
Extra Trees Accuracy for 'cvss_score' : 0.6825242718446602
K-Fold mean F1 (type): 0.824293133959998
K-Fold std  F1 (type): 0.015030205397220815
K-Fold mean F1 (cvss_score): 0.5575898042311854
K-Fold std  F1 (cvss_score): 0.015077108240897977
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       0.82      0.32      0.46        28
         1.0       0.98      0.90      0.94        50
         2.0       0.83      0.97      0.89       103
         3.0       0.78      0.80      0.7

# Hist Gradient Boosting

In [41]:
hgb = HistGradientBoostingClassifier(max_iter=220, max_depth=5, random_state=42)
multi_hgb = MultiOutputClassifier(hgb)

multi_hgb.fit(x_train[selected_features], y_train)
y_pred = multi_hgb.predict(x_test[selected_features])

hgb_accuracy_type = accuracy_score(y_test['type'], y_pred[:, 0])
hgb_accuracy_cvss_score  = accuracy_score(y_test['cvss_score'], y_pred[:, 1])

hgb_scores_type = cross_val_score(hgb, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
hgb_scores_cvss_score  = cross_val_score(hgb, x[selected_features], y['cvss_score'], cv=kf, scoring='f1_macro')

print("HistGradientBoosting Accuracy for 'type':", hgb_accuracy_type)
print("HistGradientBoosting Accuracy for 'cvss_score' :", hgb_accuracy_cvss_score)

print("K-Fold mean F1 (type):", hgb_scores_type.mean())
print("K-Fold std  F1 (type):", hgb_scores_type.std())

print("K-Fold mean F1 (cvss_score):", hgb_scores_cvss_score.mean())
print("K-Fold std  F1 (cvss_score):", hgb_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

HistGradientBoosting Accuracy for 'type': 0.945631067961165
HistGradientBoosting Accuracy for 'cvss_score' : 0.6757281553398058
K-Fold mean F1 (type): 0.9190522940831265
K-Fold std  F1 (type): 0.00898224320725334
K-Fold mean F1 (cvss_score): 0.5557306124460768
K-Fold std  F1 (cvss_score): 0.013636690580560558
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       0.96      0.86      0.91        28
         1.0       0.98      0.92      0.95        50
         2.0       0.83      0.92      0.87       103
         3.0       0.95  

# KNN

In [42]:
knn = KNeighborsClassifier()
multi_knn = MultiOutputClassifier(knn)

multi_knn.fit(x_train[selected_features], y_train)
y_pred = multi_knn.predict(x_test[selected_features])

knn_accuracy_type = accuracy_score(y_test['type'], y_pred[:, 0])
knn_accuracy_cvss_score  = accuracy_score(y_test['cvss_score'], y_pred[:, 1])

knn_scores_type = cross_val_score(knn, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
knn_scores_cvss_score  = cross_val_score(knn, x[selected_features], y['cvss_score'], cv=kf, scoring='f1_macro')

print("KNN Accuracy for 'type':", knn_accuracy_type)
print("KNN Accuracy for 'cvss_score' :", knn_accuracy_cvss_score)

print("K-Fold mean F1 (type):", knn_scores_type.mean())
print("K-Fold std  F1 (type):", knn_scores_type.std())

print("K-Fold mean F1 (cvss_score):", knn_scores_cvss_score.mean())
print("K-Fold std  F1 (cvss_score):", knn_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

KNN Accuracy for 'type': 0.8145631067961165
KNN Accuracy for 'cvss_score' : 0.6174757281553398
K-Fold mean F1 (type): 0.6516879574336875
K-Fold std  F1 (type): 0.0016867480142852942
K-Fold mean F1 (cvss_score): 0.49481467596056317
K-Fold std  F1 (cvss_score): 0.013803300162317199
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       0.33      0.21      0.26        28
         1.0       0.91      0.82      0.86        50
         2.0       0.77      0.77      0.77       103
         3.0       0.55      0.66      0.60       200
 

# Adaboost

In [43]:
ab = AdaBoostClassifier(n_estimators=220, random_state=42)
multi_ada = MultiOutputClassifier(ab)

multi_ada.fit(x_train[selected_features], y_train)
y_pred = multi_ada.predict(x_test[selected_features])

ab_accuracy_type = accuracy_score(y_test['type'], y_pred[:, 0])
ab_accuracy_cvss_score  = accuracy_score(y_test['cvss_score'], y_pred[:, 1])

ab_scores_type = cross_val_score(ab, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
ab_scores_cvss_score  = cross_val_score(ab, x[selected_features], y['cvss_score'], cv=kf, scoring='f1_macro')

print("AdaBoost Accuracy for 'type':", ab_accuracy_type)
print("AdaBoost Accuracy for 'cvss_score' :", ab_accuracy_cvss_score)

print("K-Fold mean F1 (type):", ab_scores_type.mean())
print("K-Fold std  F1 (type):", ab_scores_type.std())

print("K-Fold mean F1 (cvss_score):", ab_scores_cvss_score.mean())
print("K-Fold std  F1 (cvss_score):", ab_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

AdaBoost Accuracy for 'type': 0.7961165048543689
AdaBoost Accuracy for 'cvss_score' : 0.5844660194174758
K-Fold mean F1 (type): 0.5891460853764707
K-Fold std  F1 (type): 0.01821092970752853
K-Fold mean F1 (cvss_score): 0.315988818214427
K-Fold std  F1 (cvss_score): 0.005067793735139676
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       1.00      0.25      0.40        28
         1.0       0.93      0.80      0.86        50
         2.0       0.69      0.92      0.79       103
         3.0       0.68      0.23      0.35      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


# Bagging

In [44]:
from sklearn.ensemble import BaggingClassifier

bagging = BaggingClassifier(n_estimators=100, random_state=42)
multi_bagging = MultiOutputClassifier(bagging)

multi_bagging.fit(x_train[selected_features], y_train)
y_pred = multi_bagging.predict(x_test[selected_features])

bag_accuracy_type = accuracy_score(y_test['type'], y_pred[:, 0])
bag_accuracy_cvss_score  = accuracy_score(y_test['cvss_score'], y_pred[:, 1])

bag_scores_type = cross_val_score(bagging, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
bag_scores_cvss_score  = cross_val_score(bagging, x[selected_features], y['cvss_score'], cv=kf, scoring='f1_macro')

print("Bagging Accuracy for 'type':", bag_accuracy_type)
print("Bagging Accuracy for 'cvss_score' :", bag_accuracy_cvss_score)

print("K-Fold mean F1 (type):", bag_scores_type.mean())
print("K-Fold std  F1 (type):", bag_scores_type.std())

print("K-Fold mean F1 (cvss_score):", bag_scores_cvss_score.mean())
print("K-Fold std  F1 (cvss_score):", bag_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

Bagging Accuracy for 'type': 0.9466019417475728
Bagging Accuracy for 'cvss_score' : 0.6786407766990291
K-Fold mean F1 (type): 0.9204254492751835
K-Fold std  F1 (type): 0.005555509703043668
K-Fold mean F1 (cvss_score): 0.5619268596334673
K-Fold std  F1 (cvss_score): 0.012366686035912153
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       1.00      0.89      0.94        28
         1.0       1.00      0.94      0.97        50
         2.0       0.83      0.90      0.87       103
         3.0       0.97      0.94      0.95      

# SVC

In [45]:
from sklearn.svm import SVC

svc = SVC(kernel='rbf', C=5, probability=True)
multi_svc = MultiOutputClassifier(svc)

multi_svc.fit(x_train[selected_features], y_train)
y_pred = multi_svc.predict(x_test[selected_features])

svc_accuracy_type = accuracy_score(y_test['type'], y_pred[:, 0])
svc_accuracy_cvss_score  = accuracy_score(y_test['cvss_score'], y_pred[:, 1])

svc_scores_type = cross_val_score(svc, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
svc_scores_cvss_score  = cross_val_score(svc, x[selected_features], y['cvss_score'], cv=kf, scoring='f1_macro')

print("SVC Accuracy for 'type':", svc_accuracy_type)
print("SVC Accuracy for 'cvss_score' :", svc_accuracy_cvss_score)

print("K-Fold mean F1 (type):", svc_scores_type.mean())
print("K-Fold std  F1 (type):", svc_scores_type.std())

print("K-Fold mean F1 (cvss_score):", svc_scores_cvss_score.mean())
print("K-Fold std  F1 (cvss_score):", svc_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

SVC Accuracy for 'type': 0.833495145631068
SVC Accuracy for 'cvss_score' : 0.6320388349514563
K-Fold mean F1 (type): 0.6754804633835881
K-Fold std  F1 (type): 0.009286583464018684
K-Fold mean F1 (cvss_score): 0.3448318952318084
K-Fold std  F1 (cvss_score): 0.0009289205391715102
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       0.67      0.07      0.13        28
         1.0       0.98      0.80      0.88        50
         2.0       0.78      0.95      0.86       103
         3.0       0.55      0.68      0.60       200
   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


# Hard Voting

In [53]:
from sklearn.ensemble import VotingClassifier

model1 = RandomForestClassifier(n_estimators=200, random_state=42)
model2 = ExtraTreesClassifier(n_estimators=200, random_state=42)
model3 = LogisticRegression(max_iter=500)

voting_hard = VotingClassifier(
    estimators=[
        ('rf', model1),
        ('et', model2),
        ('lr', model3)
    ],
    voting='hard'
)

multi_voting_hard = MultiOutputClassifier(voting_hard)

multi_voting_hard.fit(x_train[selected_features], y_train)
y_pred = multi_voting_hard.predict(x_test[selected_features])

hard_acc_type = accuracy_score(y_test["type"], y_pred[:, 0])
hard_acc_cvss_score  = accuracy_score(y_test["cvss_score"],  y_pred[:, 1])

hard_scores_type = cross_val_score(voting_hard, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
hard_scores_cvss_score  = cross_val_score(voting_hard, x[selected_features], y['cvss_score'],  cv=kf, scoring='f1_macro')

print("Hard Voting Accuracy for type:", hard_acc_type)
print("Hard Voting Accuracy for cvss_score :", hard_acc_cvss_score)

print("K-fold F1 mean (type):", hard_scores_type.mean())
print("K-fold F1 std  (type):", hard_scores_type.std())

print("K-fold F1 mean (cvss_score) :", hard_scores_cvss_score.mean())
print("K-fold F1 std  (cvss_score) :", hard_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

Hard Voting Accuracy for type: 0.9053398058252428
Hard Voting Accuracy for cvss_score : 0.6868932038834952
K-fold F1 mean (type): 0.8212175810473052
K-fold F1 std  (type): 0.005298101155705767
K-fold F1 mean (cvss_score) : 0.5473071467489796
K-fold F1 std  (cvss_score) : 0.013028364165624497
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       0.80      0.29      0.42        28
         1.0       0.98      0.88      0.93        50
         2.0       0.83      0.97      0.89       103
         3.0       0.80      0.82      0.81

# Soft Voting

In [54]:
model1 = RandomForestClassifier(n_estimators=200, random_state=42)
model2 = ExtraTreesClassifier(n_estimators=200, random_state=42)
model3 = LogisticRegression(max_iter=500)

voting_hard = VotingClassifier(
    estimators=[
        ('rf', model1),
        ('et', model2),
        ('lr', model3)
    ],
    voting='soft'
)

multi_voting_hard = MultiOutputClassifier(voting_hard)

multi_voting_hard.fit(x_train[selected_features], y_train)
y_pred = multi_voting_hard.predict(x_test[selected_features])

soft_acc_type = accuracy_score(y_test["type"], y_pred[:, 0])
soft_acc_cvss_score  = accuracy_score(y_test["cvss_score"],  y_pred[:, 1])

soft_scores_type = cross_val_score(voting_hard, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
soft_scores_cvss_score  = cross_val_score(voting_hard, x[selected_features], y['cvss_score'],  cv=kf, scoring='f1_macro')


print("Hard Voting Accuracy for type:", soft_acc_type)
print("Hard Voting Accuracy for cvss_score :", soft_acc_cvss_score)

print("K-fold F1 mean (type):", soft_scores_type.mean())
print("K-fold F1 std  (type):", soft_scores_type.std())

print("K-fold F1 mean (cvss_score) :", soft_scores_cvss_score.mean())
print("K-fold F1 std  (cvss_score) :", soft_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

Hard Voting Accuracy for type: 0.9029126213592233
Hard Voting Accuracy for cvss_score : 0.6800970873786408
K-fold F1 mean (type): 0.8056456943222755
K-fold F1 std  (type): 0.007718480421667791
K-fold F1 mean (cvss_score) : 0.531684979950014
K-fold F1 std  (cvss_score) : 0.002339979612694998
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       1.00      0.21      0.35        28
         1.0       0.98      0.88      0.93        50
         2.0       0.83      0.98      0.90       103
         3.0       0.81      0.82      0.81 

# Stacking

In [55]:
from sklearn.ensemble import StackingClassifier

base1 = RandomForestClassifier(n_estimators=200, random_state=42)
base2 = ExtraTreesClassifier(n_estimators=200, random_state=42)
base3 = LogisticRegression(max_iter=500)

stacking = StackingClassifier(
    estimators=[
        ('rf', base1),
        ('et', base2),
        ('lr', base3)
    ],
    final_estimator=LogisticRegression(max_iter=500)
)

multi_stacking = MultiOutputClassifier(stacking)

multi_stacking.fit(x_train[selected_features], y_train)
y_pred = multi_stacking.predict(x_test[selected_features])

stacking_acc_type = accuracy_score(y_test["type"], y_pred[:, 0])
stacking_acc_cvss_score  = accuracy_score(y_test["cvss_score"],  y_pred[:, 1])

stacking_scores_type = cross_val_score(stacking, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
stacking_scores_cvss_score  = cross_val_score(stacking, x[selected_features], y['cvss_score'],  cv=kf, scoring='f1_macro')

print("Stacking Accuracy for type:", stacking_acc_type)
print("Stacking Accuracy for cvss_score :", stacking_acc_cvss_score)

print("K-fold F1 mean (type):", stacking_scores_type.mean())
print("K-fold F1 std  (type):", stacking_scores_type.std())

print("K-fold F1 mean (cvss_score):", stacking_scores_cvss_score.mean())
print("K-fold F1 std  (cvss_score):", stacking_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

Stacking Accuracy for type: 0.9257281553398058
Stacking Accuracy for cvss_score : 0.6932038834951456
K-fold F1 mean (type): 0.8531762486172433
K-fold F1 std  (type): 0.007481164801061603
K-fold F1 mean (cvss_score): 0.5655048145801237
K-fold F1 std  (cvss_score): 0.015621655104252544
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       0.83      0.36      0.50        28
         1.0       0.98      0.90      0.94        50
         2.0       0.85      0.96      0.90       103
         3.0       0.81      0.90      0.85       2

# Bagged KNN

In [49]:
knn = KNeighborsClassifier()
bag_knn = BaggingClassifier(estimator=knn, n_estimators=100, random_state=42)
multi_bag_knn = MultiOutputClassifier(bag_knn)

multi_bag_knn.fit(x_train[selected_features], y_train)
y_pred = multi_bag_knn.predict(x_test[selected_features])

bag_knn_accuracy_type = accuracy_score(y_test['type'], y_pred[:,0])
bag_knn_accuracy_cvss_score = accuracy_score(y_test['cvss_score'], y_pred[:,1])

bag_knn_scores_type = cross_val_score(bag_knn, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
bag_knn_scores_cvss_score = cross_val_score(bag_knn, x[selected_features], y['cvss_score'], cv=kf, scoring='f1_macro')

print("Bagging KNN Accuracy for 'type':", bag_knn_accuracy_type)
print("Bagging KNN Accuracy for 'cvss_score':", bag_knn_accuracy_cvss_score)

print("K-Fold mean F1 (type):", bag_knn_scores_type.mean())
print("K-Fold std  F1 (type):", bag_knn_scores_type.std())

print("K-Fold mean F1 (cvss_score):", bag_knn_scores_cvss_score.mean())
print("K-Fold std  F1 (cvss_score):", bag_knn_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

Bagging KNN Accuracy for 'type': 0.8218446601941748
Bagging KNN Accuracy for 'cvss_score': 0.6339805825242718
K-Fold mean F1 (type): 0.6657400251795847
K-Fold std  F1 (type): 0.010135535242582839
K-Fold mean F1 (cvss_score): 0.49651705047736366
K-Fold std  F1 (cvss_score): 0.014166032499094665
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       0.56      0.18      0.27        28
         1.0       0.91      0.84      0.88        50
         2.0       0.82      0.77      0.79       103
         3.0       0.58      0.66      0.

# Bagged DecisionTreeClassifier

In [50]:
dt = DecisionTreeClassifier(random_state=42)
bag_dt = BaggingClassifier(estimator=dt, n_estimators=100, random_state=42)
multi_bag_dt = MultiOutputClassifier(bag_dt)

multi_bag_dt.fit(x_train[selected_features], y_train)
y_pred = multi_bag_dt.predict(x_test[selected_features])

bag_dt_accuracy_type = accuracy_score(y_test['type'], y_pred[:,0])
bag_dt_accuracy_cvss_score = accuracy_score(y_test['cvss_score'], y_pred[:,1])

bag_dt_scores_type = cross_val_score(bag_dt, x[selected_features], y['type'], cv=kf, scoring='f1_macro')
bag_dt_scores_cvss_score = cross_val_score(bag_dt, x[selected_features], y['cvss_score'], cv=kf, scoring='f1_macro')

print("Bagging Decision Tree Accuracy for 'type':", bag_dt_accuracy_type)
print("Bagging Decision Tree Accuracy for 'cvss_score':", bag_dt_accuracy_cvss_score)

print("K-Fold mean F1 (type):", bag_dt_scores_type.mean())
print("K-Fold std  F1 (type):", bag_dt_scores_type.std())

print("K-Fold mean F1 (cvss_score):", bag_dt_scores_cvss_score.mean())
print("K-Fold std  F1 (cvss_score):", bag_dt_scores_cvss_score.std())

print("Selected features used:", list(selected_features))

print("\nClassification Report for 'type':\n", classification_report(y_test['type'], y_pred[:,0]))
print("\nClassification Report for 'cvss_score':\n", classification_report(y_test['cvss_score'], y_pred[:,1]))

Bagging Decision Tree Accuracy for 'type': 0.9466019417475728
Bagging Decision Tree Accuracy for 'cvss_score': 0.6786407766990291
K-Fold mean F1 (type): 0.9204254492751835
K-Fold std  F1 (type): 0.005555509703043668
K-Fold mean F1 (cvss_score): 0.5619268596334673
K-Fold std  F1 (cvss_score): 0.012366686035912153
Selected features used: ['PrivEsc_score', 'publish_date', 'vendor_product_interaction', 'RCE_score', 'desc_question', 'vendor_freq', 'desc_num_count', 'cwe', 'desc_upper_ratio', 'XSS_score', 'desc_len', 'Other_score', 'product', 'cvss_keywords_score', 'InfoDisclosure_score', 'product_freq', 'SSRF_score', 'description', 'DoS_score', 'cve_id', 'AuthBypass_score', 'SQLi_score', 'desc_word_count', 'CSRF_score']

Classification Report for 'type':
               precision    recall  f1-score   support

         0.0       1.00      0.89      0.94        28
         1.0       1.00      0.94      0.97        50
         2.0       0.83      0.90      0.87       103
         3.0       0.9

In [56]:
from rich.table import Table
from rich.console import Console

console = Console()

results = [
    ['LogisticRegression', lr_accuracy_type, lr_scores_type.mean(), lr_scores_type.std(), lr_accuracy_cvss_score, lr_scores_cvss_score.mean(), lr_scores_cvss_score.std()],
    ['DecisionTree', dt_accuracy_type, dt_scores_type.mean(), dt_scores_type.std(), dt_accuracy_cvss_score, dt_scores_cvss_score.mean(), dt_scores_cvss_score.std()],
    ['RandomForest', rf_accuracy_type, rf_scores_type.mean(), rf_scores_type.std(), rf_accuracy_cvss_score, rf_scores_cvss_score.mean(), rf_scores_cvss_score.std()],
    ['ExtraTrees', et_accuracy_type, et_scores_type.mean(), et_scores_type.std(), et_accuracy_cvss_score, et_scores_cvss_score.mean(), et_scores_cvss_score.std()],
    ['GradientBoosting', gb_accuracy_type, gb_scores_type.mean(), gb_scores_type.std(), gb_accuracy_cvss_score, gb_scores_cvss_score.mean(), gb_scores_cvss_score.std()],
    ['HistGradientBoosting', hgb_accuracy_type, hgb_scores_type.mean(), hgb_scores_type.std(), hgb_accuracy_cvss_score, hgb_scores_cvss_score.mean(), hgb_scores_cvss_score.std()],
    ['KNN', knn_accuracy_type, knn_scores_type.mean(), knn_scores_type.std(), knn_accuracy_cvss_score, knn_scores_cvss_score.mean(), knn_scores_cvss_score.std()],
    ['AdaBoost', ab_accuracy_type, ab_scores_type.mean(), ab_scores_type.std(), ab_accuracy_cvss_score, ab_scores_cvss_score.mean(), ab_scores_cvss_score.std()],
    ['Bagging', bag_accuracy_type, bag_scores_type.mean(), bag_scores_type.std(), bag_accuracy_cvss_score, bag_scores_cvss_score.mean(), bag_scores_cvss_score.std()],
    ['Hard Voting', hard_acc_type, hard_scores_type.mean(), hard_scores_type.std(), hard_acc_cvss_score, hard_scores_cvss_score.mean(), hard_scores_cvss_score.std()],
    ['Soft Voting', soft_acc_type, soft_scores_type.mean(), soft_scores_type.std(), soft_acc_cvss_score, soft_scores_cvss_score.mean(), soft_scores_cvss_score.std()],
    ['Stacking', stacking_acc_type, stacking_scores_type.mean(), stacking_scores_type.std(), stacking_acc_cvss_score, stacking_scores_cvss_score.mean(), stacking_scores_cvss_score.std()],
    ['SVM', svc_accuracy_type, svc_scores_type.mean(), svc_scores_type.std(), svc_accuracy_cvss_score, svc_scores_cvss_score.mean(), svc_scores_cvss_score.std()],
    ['Bagged KNN', bag_knn_accuracy_type, bag_knn_scores_type.mean(), bag_knn_scores_type.std(), bag_knn_accuracy_cvss_score, bag_knn_scores_cvss_score.mean(), bag_knn_scores_cvss_score.std()],
    ['Bagged DT', bag_dt_accuracy_type, bag_dt_scores_type.mean(), bag_dt_scores_type.std(), bag_dt_accuracy_cvss_score, bag_dt_scores_cvss_score.mean(), bag_dt_scores_cvss_score.std()],
]


for row in results:
    type_acc = row[1]
    cvss_score_acc = row[4]
    combined = (type_acc + cvss_score_acc) / 2
    row.append(combined)

result_sorted = sorted(results, key=lambda i: i[-1], reverse=True)

best_model = max(results, key=lambda x: x[-1])
worst_model = min(results, key=lambda x: x[-1])

table = Table(title="Filter Method Comparison", show_lines=True)
table.add_column("Algorithm")
table.add_column("Type Acc")
table.add_column("K-Fold Mean")
table.add_column("K-Fold Std")
table.add_column("cvss_score Acc")
table.add_column("K-Fold Mean")
table.add_column("K-Fold Std")
table.add_column("Combined", justify="right")

for row in result_sorted:
    algo, type_acc, kmean_type, kstd_type, cvss_score_acc, kmean_cvss_score, kstd_cvss_score, combined = row

    if row == best_model:
        table.add_row(
            f"[bold green]{algo}[/bold green]",
            f"[bold green]{type_acc:.2f}[/bold green]",
            f"[bold green]{kmean_type:.2f}[/bold green]",
            f"[bold green]{kstd_type:.2f}[/bold green]",
            f"[bold green]{cvss_score_acc:.2f}[/bold green]",
            f"[bold green]{kmean_cvss_score:.2f}[/bold green]",
            f"[bold green]{kstd_cvss_score:.2f}[/bold green]",
            f"[bold green]{combined:.2f}[/bold green]",
        )
    elif row == worst_model:
        table.add_row(
            f"[bold red]{algo}[/bold red]",
            f"[bold red]{type_acc:.2f}[/bold red]",
            f"[bold red]{kmean_type:.2f}[/bold red]",
            f"[bold red]{kstd_type:.2f}[/bold red]",
            f"[bold red]{cvss_score_acc:.2f}[/bold red]",
            f"[bold red]{kmean_cvss_score:.2f}[/bold red]",
            f"[bold red]{kstd_cvss_score:.2f}[/bold red]",
            f"[bold red]{combined:.2f}[/bold red]",
        )
    else:
        table.add_row(
            algo, f"{type_acc:.2f}", f"{kmean_type:.2f}", f"{kstd_type:.2f}",
            f"{cvss_score_acc:.2f}", f"{kmean_cvss_score:.2f}", f"{kstd_cvss_score:.2f}", f"{combined:.2f}"
        )

console.print(table)


In [58]:
os.makedirs('results', exist_ok=True)

temp_console = Console(record=True)
temp_console.print(table)
text = temp_console.export_text()
with open('results/feature_selection_compare.txt', 'a', encoding='utf-8') as f:
    f.write(text)