In [None]:
import pandas as pd 
import numpy as np
import os
from joblib import dump
import pickle as pkl
import pickle
import itertools

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.colors as mcolors
import matplotlib.patches as mpatches


from sklearn.metrics import make_scorer, fbeta_score,  mean_squared_error, r2_score, f1_score
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score,
                             roc_auc_score, precision_recall_curve, classification_report, confusion_matrix)
from sklearn.linear_model import LogisticRegression
from sklearn.dummy import DummyClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV, GridSearchCV, train_test_split, StratifiedShuffleSplit
from sklearn import datasets
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.svm import SVC
from sklearn.inspection import permutation_importance

import xgboost as xgb
from xgboost import XGBClassifier, plot_importance

In [None]:
train_df = pd.read_csv('/work/SarahHvidAndersen#6681/DataScience_project/data/train_df.csv')
eval_df = pd.read_csv('/work/SarahHvidAndersen#6681/DataScience_project/data/eval_df.csv')
test_df = pd.read_csv('/work/SarahHvidAndersen#6681/DataScience_project/data/test_df.csv')

"""
selected_features = ['Fwd IAT Total', 'Fwd Packet Length Max',
        'Bwd Packet Length Mean', 'Bwd Packet Length Std',
        'Bwd Packets Length Total', 'Bwd Packet Length Max',
        'Packet Length Max', 'Packet Length Mean', 'Packet Length Std',
        'Packet Length Variance', 'Avg Packet Size', 'Fwd Header Length',
        'Avg Fwd Segment Size', 'Avg Bwd Segment Size']
"""

#X_train = train_df[selected_features]  # Features from the training data
X_train = train_df.drop(['Label', 'Attack'], axis=1)
y_train = train_df['Attack'] 

#X_eval = eval_df[selected_features]  # Features from the evaluation data
X_eval = eval_df.drop(['Label', 'Attack'], axis=1)
y_eval = eval_df['Attack']

#X_test = test_df[selected_features]
X_test = test_df.drop(['Label', 'Attack'], axis=1)
y_test = test_df['Attack']

In [None]:
# Initialize scaler
scaler = StandardScaler()

# Scale the features
X_train[X_train.columns] = scaler.fit_transform(X_train[X_train.columns])

# Applying the 'trained' scaler on eval and test
X_eval[X_eval.columns] = scaler.transform(X_eval[X_eval.columns])
X_test[X_test.columns] = scaler.transform(X_test[X_test.columns])

In [None]:
df = pd.read_csv('/work/SarahHvidAndersen#6681/DataScience_project/output/test_df_with_predictions.csv')
relevant_columns = [ 'Label', 'Attack', 'Baseline_RF_Absolute_Features_Predictions', 'Baseline_XGB_Absolute_Features_Predictions',
                              'Baseline_RF_All_Features_Predictions','Tuned_RF_All_Features_Predictions','Tuned_RF_Permutation_Features_Predictions',
                            'Baseline_XGB_All_Features_Predictions','Tuned_XGB_All_Features_Predictions','Tuned_XGB_Permutation_Features_Predictions','Tuned_XGB_RFECV_Features_Predictions']

df = df[relevant_columns]

In [None]:
# Check if all model predictions are '0' (no attack predicted)
actual_attacks = df[df['Attack'] == 1]
prediction_columns = [col for col in df.columns if col.startswith('Baseline_') or col.startswith('Tuned_')]

failed_predictions = actual_attacks[(actual_attacks[prediction_columns] == 0).all(axis=1)]
failed_predictions_with_labels = failed_predictions[['Label'] + prediction_columns]

print(f"Number of cases where all models failed to predict an attack: {failed_predictions_with_labels.shape[0]}")
# Number of cases where all models failed to predict an attack: 52


In [None]:
# Check where best recall and f2 model predictions are '0' (no attack predicted)
actual_attacks = df[df['Attack'] == 1]
prediction_columns = [col for col in df.columns if col.startswith('Baseline_RF_A') or col.startswith('Tuned_RF_A')]

failed_predictions = actual_attacks[(actual_attacks[prediction_columns] == 0).all(axis=1)]
failed_predictions_with_labels = failed_predictions[['Label'] + prediction_columns]

print(f"Number of cases where best RF models failed to predict an attack: {failed_predictions_with_labels.shape[0]}")

In [None]:
# Identify model prediction columns
all_model_columns = [col for col in df.columns if col.startswith('Baseline_') or col.startswith('Tuned_')]
rf_model_columns = [col for col in all_model_columns if '_RF_' in col]
xgb_model_columns = [col for col in all_model_columns if '_XGB_' in col]

# Calculate failures for each category
df['Total_Model_Failures'] = df[all_model_columns].apply(lambda x: (x == 0).sum(), axis=1)
df['RF_Model_Failures'] = df[rf_model_columns].apply(lambda x: (x == 0).sum(), axis=1)
df['XGB_Model_Failures'] = df[xgb_model_columns].apply(lambda x: (x == 0).sum(), axis=1)

df.head(4)

### create heatmap for failed predictions

In [None]:
# Define the model columns and filter attacks
model_columns = [col for col in df.columns if col.startswith('Baseline_') or col.startswith('Tuned_')]
actual_attacks = df[df['Attack'] == 1]

# Further filter to find rows where any model failed to predict the attack
failed_attacks_mask = actual_attacks[model_columns].apply(lambda x: (x == 0).any(), axis=1)
failed_attacks = actual_attacks[failed_attacks_mask]

# Create a binary DataFrame indicating failures (1) and successes (0)
failure_binary_df = failed_attacks[model_columns].applymap(lambda x: 1 if x == 0 else 0)

# Include the 'Label' column in the failure_binary_df for grouping
failure_binary_df['Label'] = failed_attacks['Label']

# Group by 'Label' and sort groups by their size
label_counts = failure_binary_df['Label'].value_counts()
#sorted_labels = label_counts.sort_values(ascending=True).index
sorted_labels = np.sort(failure_binary_df['Label'].unique())

# Sort the DataFrame by this order
failure_binary_df['Label'] = pd.Categorical(failure_binary_df['Label'], categories=sorted_labels, ordered=True)
sorted_failure_df = failure_binary_df.sort_values(by='Label')

# Separate the label for plotting and remove it from data for heatmap
labels = sorted_failure_df['Label']
sorted_failure_df = sorted_failure_df.drop(columns='Label')

# Transpose for heatmap plotting
sorted_failure_df_transposed = sorted_failure_df.T

In [None]:
# plot heatmap
plt.figure(figsize=(15, 4))
cmap = mcolors.ListedColormap(['#7fc97f', '#f94e42'])
ax = sns.heatmap(sorted_failure_df_transposed, cmap=cmap, cbar=False, linewidths=0.5, linecolor='black')

# Add vertical lines between groups
cumulative_counts = np.cumsum(label_counts[sorted_labels])
for count in cumulative_counts[:-1]:  # Skip the last line
    plt.axvline(x=count, color='blue', linestyle='--', linewidth=2.6)  # Change color and style as needed

# Setup x-ticks for attack types
tick_positions = cumulative_counts - label_counts[sorted_labels] / 2
plt.xticks(tick_positions, sorted_labels, rotation=45, ha='right')
plt.yticks(fontsize = 13)

ax.set_title('Heatmap of Model Failures Grouped by Attack Type', size = 20)
plt.show()

In [None]:
# get columns
model_columns = [col for col in df.columns if col.startswith('Baseline_') or col.startswith('Tuned_')]

# Create a binary DataFrame where 1 indicates a failure and 0 indicates a success
attack_cases = df[df['Attack'] == 1]

# For each model, mark failure (where prediction is 0) and success (where prediction is 1)
failure_binary_df = attack_cases[model_columns].applymap(lambda x: 1 if x == 0 else 0)
failure_binary_df['Label'] = attack_cases['Label']

# Group by 'Label' and calculate the mean of each group for each model
failure_rates = failure_binary_df.groupby('Label').mean()
failure_rates = failure_rates.T

# multiply by 100 to convert proportions to percentages
#failure_rates *= 100
failure_rates = failure_rates.round(2)
failure_rates


In [None]:
# Create heatmap of proportion of failures
plt.figure(figsize=(9, 5))
ax = sns.heatmap(failure_rates, annot=True, cmap='Reds', linewidths=.5, cbar_kws={'label': 'Failure Rate (%)'})
ax.set_title('Heatmap of Model Failure Rates by Attack Type')
plt.xticks(rotation=45, ha="right")
plt.xlabel('')
plt.show()


In [None]:
# Calculate the counts of each label in the train, eval, and test datasets
train_counts = train_df['Label'].value_counts().rename('Train Count')
eval_counts = eval_df['Label'].value_counts().rename('Eval Count')
test_counts = test_df['Label'].value_counts().rename('Test Count')

# Combine counts into a single DataFrame
counts_df = pd.DataFrame([train_counts, eval_counts, test_counts]).T
counts_df.fillna(0, inplace=True)  # Replace NaNs with 0 where labels might not appear in one of the dataframes
failure_rates_transposed = failure_rates.T  # Transpose to have labels as rows and models as columns

failed_predictions_count = failure_binary_df.drop(columns='Label').apply(sum, axis=1)  # Sum failures across all models for each instance
failed_predictions_count = failure_binary_df.groupby('Label').apply(lambda df: df.drop(columns='Label').values.sum()).rename('Failed Predictions')

# Calculate the sum of failed predictions for each model within each label
model_failed_counts = failure_binary_df.groupby('Label')[model_columns].sum().add_suffix('_Failures')

# merging
final_stats = pd.concat([counts_df, failure_rates_transposed, model_failed_counts], axis=1)
final_stats


In [None]:
final_stats.to_csv('/work/SarahHvidAndersen#6681/DataScience_project/output/count_statistics.csv', index = False)