# Setting Parameters

In [1]:
apply_classes = ['33+1', '8+1', '1+1']

apply_sampling = None   # DO NOT CHANGE

apply_evaluators = ['LogisticRegression', 'Perceptron', 'AdaBoost', 'RandomForest', 'DeepNeuralNetwork']


# Checking that inputs are available
for _class in apply_classes:
    if _class not in ['33+1', '8+1', '1+1']:
        assert False, f'{_class} is an invalid class structure.'

if apply_sampling not in [None, 'RandomOverSampler', 'RandomUnderSampler', 'SMOTE', 'Cluster+SMOTE']:
    assert False, f'{apply_sampling} is an invalid under-sampler.'

for evaluator in apply_evaluators:
    if evaluator not in ['GradientBoosting', 'LogisticRegression', 'Perceptron', 'AdaBoost', 
                         'RandomForest', 'DeepNeuralNetwork', 'KNearestNeighbor']:
        assert False, f'{evaluator} is an invalid evaluator.'

# Dataset Handling
## Imports

In [2]:
import os
import pandas as pd
import random
from datetime import datetime
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from tqdm import tqdm

## Loading the Dataset

In [3]:
DATASET_DIRECTORY = '../../dataset/'  # If your dataset is within your python project directory, change this to the relative path to your dataset
csv_filepaths = [filename for filename in os.listdir(DATASET_DIRECTORY) if filename.endswith('.csv')]

print(csv_filepaths)

# If there are more than X CSV files, randomly select X files from the list
sample_size = 5

if len(csv_filepaths) > sample_size:
    csv_filepaths = random.sample(csv_filepaths, sample_size)
    print(csv_filepaths)

csv_filepaths.sort()

# list of csv files used
data_sets = csv_filepaths

full_data = pd.DataFrame()
for data_set in data_sets:
    print(f"data set {data_set} out of {len(data_sets)} \n")
    data_path = os.path.join(DATASET_DIRECTORY, data_set)
    df = pd.read_csv(data_path)
    full_data = pd.concat([full_data, df])

# prints an instance of each class
print("Before encoding:")
unique_labels = full_data['label'].unique()
for label in unique_labels:
    print(f"First instance of {label}:")
    print(full_data[full_data['label'] == label].iloc[0])

# Shuffle data
full_data = shuffle(full_data, random_state=1)

# prove if the data is loaded properly
print("Real data:")
print(full_data[:2])
print(full_data.shape)

# Assuming 'label' is the column name for the labels in the DataFrame `synth_data`
unique_labels = full_data['label'].nunique()

# Print the number of unique labels
print(f"There are {unique_labels} unique labels in the dataset.")

class_counts = full_data['label'].value_counts()
print(class_counts)

# Display the first few entries to verify the changes
full_data.describe()

['part-00000-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00001-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00002-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00003-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00004-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00005-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00006-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00007-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00008-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00009-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00010-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00011-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00012-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00013-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00014-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00015-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv', 'part-00016-363d1ba3-8ab5-4f96-bc25-4d5

Unnamed: 0,flow_duration,Header_Length,Protocol Type,Duration,Rate,Srate,Drate,fin_flag_number,syn_flag_number,rst_flag_number,...,AVG,Std,Tot size,IAT,Number,Magnitue,Radius,Covariance,Variance,Weight
count,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,...,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0
mean,5.569101,76247.93,9.064399,66.33944,9152.159,9152.159,9.511405e-06,0.08691738,0.2074154,0.09080571,...,124.5973,33.4099,124.6802,83189260.0,9.498846,13.11828,47.21543,31123.97,0.09638938,141.5211
std,267.3291,459859.3,8.941006,13.97123,100516.7,100516.7,0.01045178,0.281714,0.4054558,0.2873327,...,241.0566,161.5249,241.9839,17017110.0,0.8175156,8.627248,228.4536,330218.5,0.2330167,21.0281
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,42.0,0.0,42.0,0.0,1.0,9.165151,0.0,0.0,0.0,1.0
25%,0.0,54.0,6.0,64.0,2.095238,2.095238,0.0,0.0,0.0,0.0,...,50.0,0.0,50.0,83071570.0,9.5,10.0,0.0,0.0,0.0,141.55
50%,0.0,54.0,6.0,64.0,15.76136,15.76136,0.0,0.0,0.0,0.0,...,54.0,0.0,54.0,83124520.0,9.5,10.3923,0.0,0.0,0.0,141.55
75%,0.1048855,278.3975,14.3,64.0,118.216,118.216,0.0,0.0,0.0,0.0,...,54.04613,0.3719096,54.06,83343910.0,9.5,10.39651,0.5059213,1.344216,0.08,141.55
max,131456.2,9826475.0,47.0,255.0,8388608.0,8388608.0,13.3683,1.0,1.0,1.0,...,8206.492,9320.997,9239.0,167639400.0,15.0,127.1625,13207.88,87711600.0,1.0,244.6


# Preprocessing
## Encoding Labels

In [4]:
full_data['33+1'] = full_data['label'].copy()

label_categories = {
    'Backdoor_Malware': 'Web',
    'BenignTraffic': 'Benign',
    'BrowserHijacking': 'Web',
    'CommandInjection': 'DDoS',
    'DDoS-ACK_Fragmentation': 'DDoS',
    'DDoS-HTTP_Flood': 'DDoS',
    'DDoS-ICMP_Flood': 'DDoS',
    'DDoS-ICMP_Fragmentation': 'DDoS',
    'DDoS-PSHACK_Flood': 'DDoS',
    'DDoS-RSTFINFlood': 'DDoS',
    'DDoS-SYN_Flood': 'DDoS',
    'DDoS-SlowLoris': 'DDoS',
    'DDoS-SynonymousIP_Flood': 'DDoS',
    'DDoS-TCP_Flood': 'DDoS',
    'DDoS-UDP_Flood': 'DDoS',
    'DDoS-UDP_Fragmentation': 'DDoS',
    'DNS_Spoofing': 'Spoofing',
    'DictionaryBruteForce': 'BruteForce',
    'DoS-HTTP_Flood': 'DoS',
    'DoS-SYN_Flood': 'DoS',
    'DoS-TCP_Flood': 'DoS',
    'DoS-UDP_Flood': 'DoS',
    'MITM-ArpSpoofing': 'Spoofing',
    'Mirai-greeth_flood': 'Mirai',
    'Mirai-greip_flood': 'Mirai',
    'Mirai-udpplain': 'Mirai',
    'Recon-HostDiscovery': 'Recon',
    'Recon-OSScan': 'Recon',
    'Recon-PingSweep': 'Recon',
    'Recon-PortScan': 'Recon',
    'SqlInjection': 'Web',
    'Uploading_Attack': 'Web',
    'VulnerabilityScan': 'Recon',
    'XSS': 'Web'
}
full_data['8+1'] = full_data['33+1'].map(label_categories)

full_data.loc[full_data['label'] != 'BenignTraffic', '1+1'] = 'Attack'
full_data.loc[full_data['label'] == 'BenignTraffic', '1+1'] = 'Benign'

full_label_encoder = LabelEncoder()
class_label_encoder = LabelEncoder()
binary_label_encoder = LabelEncoder()

full_data['33+1'] = full_label_encoder.fit_transform(full_data['33+1'])
full_data['8+1'] = class_label_encoder.fit_transform(full_data['8+1'])
full_data['1+1'] = binary_label_encoder.fit_transform(full_data['1+1'])

# Store label mappings
label_mapping = {index: label for index, label in enumerate(full_label_encoder.classes_)}
print("Label mappings:", label_mapping)

# Retrieve the numeric codes for classes
class_codes = {label: full_label_encoder.transform([label])[0] for label in full_label_encoder.classes_}

# Print specific instances after label encoding
print("After encoding:")
for label, code in class_codes.items():
    # Print the first instance of each class
    print(f"First instance of {label} (code {code}):")
    print(full_data[full_data['33+1'] == code].iloc[0])

full_data.head()

Label mappings: {0: 'Backdoor_Malware', 1: 'BenignTraffic', 2: 'BrowserHijacking', 3: 'CommandInjection', 4: 'DDoS-ACK_Fragmentation', 5: 'DDoS-HTTP_Flood', 6: 'DDoS-ICMP_Flood', 7: 'DDoS-ICMP_Fragmentation', 8: 'DDoS-PSHACK_Flood', 9: 'DDoS-RSTFINFlood', 10: 'DDoS-SYN_Flood', 11: 'DDoS-SlowLoris', 12: 'DDoS-SynonymousIP_Flood', 13: 'DDoS-TCP_Flood', 14: 'DDoS-UDP_Flood', 15: 'DDoS-UDP_Fragmentation', 16: 'DNS_Spoofing', 17: 'DictionaryBruteForce', 18: 'DoS-HTTP_Flood', 19: 'DoS-SYN_Flood', 20: 'DoS-TCP_Flood', 21: 'DoS-UDP_Flood', 22: 'MITM-ArpSpoofing', 23: 'Mirai-greeth_flood', 24: 'Mirai-greip_flood', 25: 'Mirai-udpplain', 26: 'Recon-HostDiscovery', 27: 'Recon-OSScan', 28: 'Recon-PingSweep', 29: 'Recon-PortScan', 30: 'SqlInjection', 31: 'Uploading_Attack', 32: 'VulnerabilityScan', 33: 'XSS'}
After encoding:
First instance of Backdoor_Malware (code 0):
flow_duration            205.624136
Header_Length                8520.9
Protocol Type                  12.6
Duration                

Unnamed: 0,flow_duration,Header_Length,Protocol Type,Duration,Rate,Srate,Drate,fin_flag_number,syn_flag_number,rst_flag_number,...,Number,Magnitue,Radius,Covariance,Variance,Weight,label,33+1,8+1,1+1
90822,0.0,53.46,5.95,65.91,0.39997,0.39997,0.0,0.0,1.0,0.0,...,9.5,10.399041,0.420429,2.94644,0.03,141.55,DDoS-SYN_Flood,10,2,0
55197,0.264655,154715.66,17.0,64.0,384.25226,384.25226,0.0,0.0,0.0,0.0,...,9.5,41.680604,769.472095,311678.811559,0.95,141.55,DDoS-UDP_Fragmentation,15,2,0
226164,0.0,54.0,6.0,64.0,11.825434,11.825434,0.0,0.0,0.0,0.0,...,9.5,10.392305,0.0,0.0,0.0,141.55,DDoS-TCP_Flood,13,2,0
45503,0.0,54.0,6.0,64.0,102.181716,102.181716,0.0,0.0,1.0,0.0,...,9.5,10.392305,0.0,0.0,0.0,141.55,DoS-SYN_Flood,19,3,0
417549,0.0,54.0,6.0,64.0,25.718515,25.718515,0.0,0.0,1.0,0.0,...,9.5,10.392305,0.0,0.0,0.0,141.55,DDoS-SynonymousIP_Flood,12,2,0


## 

In [5]:
X = full_data.drop(['label', '33+1', '8+1', '1+1'], axis=1)
y = full_data[['label', '33+1', '8+1', '1+1']]

# Sampling (SMOTE-NC Over-Sampling)
## UnderSampling

In [6]:
if apply_sampling is not None:
    cat_cols = [
        'Protocol Type', 'Drate', 'fin_flag_number', 'syn_flag_number', 'rst_flag_number',
        'psh_flag_number', 'ack_flag_number', 'ece_flag_number',
        'cwr_flag_number', 'HTTP', 'HTTPS', 'DNS', 'Telnet',
        'SMTP', 'SSH', 'IRC', 'TCP', 'UDP', 'DHCP', 'ARP',
        'ICMP', 'IPv', 'LLC'
    ]
    
    undersampler = None
    oversampler = None
    
    match apply_sampling:
        case 'RandomOverSampler':
            from imblearn.over_sampling import RandomOverSampler
            oversampler = RandomOverSampler(random_state=42)
        case 'RandomUnderSampler':
            from imblearn.under_sampling import RandomUnderSampler
            undersampler = RandomUnderSampler(random_state=42)
        case 'SMOTENC':
            from imblearn.over_sampling import SMOTENC
            oversampler = SMOTENC(categorical_features=cat_cols, random_state=42)
        case 'CNN+SMOTENC':
            from imblearn.under_sampling import CondensedNearestNeighbour
            from imblearn.over_sampling import SMOTENC
            undersampler = CondensedNearestNeighbour(random_state=42)
            oversampler = SMOTENC(categorical_features=cat_cols, random_state=42)

#   Resampling does not yet include 33+1, 8+1, 1+1 classes
#
#    if undersampler is not None:
#        X, y = undersampler.fit_resample(X, y)  
#    if oversampler is not None:
#        X, y = oversampler.fit_resample(X, y)
        
else:
    print('N/A')

N/A


In [7]:
# Combine the resampled features and labels back into a single DataFrame
full_data_resampled = pd.concat([X, y], axis=1)

print(full_data_resampled.head())
print("Resampled Data (SCALED):")
for label, code in class_codes.items():
    # Print the first instance of each class
    print(f"First instance of {label} (code {code}):")
    print(full_data_resampled[full_data_resampled['33+1'] == code].iloc[0])

full_data_resampled.head()

        flow_duration  Header_Length  Protocol Type  Duration        Rate  \
90822        0.000000          53.46           5.95     65.91    0.399970   
55197        0.264655      154715.66          17.00     64.00  384.252260   
226164       0.000000          54.00           6.00     64.00   11.825434   
45503        0.000000          54.00           6.00     64.00  102.181716   
417549       0.000000          54.00           6.00     64.00   25.718515   

             Srate  Drate  fin_flag_number  syn_flag_number  rst_flag_number  \
90822     0.399970    0.0              0.0              1.0              0.0   
55197   384.252260    0.0              0.0              0.0              0.0   
226164   11.825434    0.0              0.0              0.0              0.0   
45503   102.181716    0.0              0.0              1.0              0.0   
417549   25.718515    0.0              0.0              1.0              0.0   

        ...  Number   Magnitue      Radius     Covarianc

Unnamed: 0,flow_duration,Header_Length,Protocol Type,Duration,Rate,Srate,Drate,fin_flag_number,syn_flag_number,rst_flag_number,...,Number,Magnitue,Radius,Covariance,Variance,Weight,label,33+1,8+1,1+1
90822,0.0,53.46,5.95,65.91,0.39997,0.39997,0.0,0.0,1.0,0.0,...,9.5,10.399041,0.420429,2.94644,0.03,141.55,DDoS-SYN_Flood,10,2,0
55197,0.264655,154715.66,17.0,64.0,384.25226,384.25226,0.0,0.0,0.0,0.0,...,9.5,41.680604,769.472095,311678.811559,0.95,141.55,DDoS-UDP_Fragmentation,15,2,0
226164,0.0,54.0,6.0,64.0,11.825434,11.825434,0.0,0.0,0.0,0.0,...,9.5,10.392305,0.0,0.0,0.0,141.55,DDoS-TCP_Flood,13,2,0
45503,0.0,54.0,6.0,64.0,102.181716,102.181716,0.0,0.0,1.0,0.0,...,9.5,10.392305,0.0,0.0,0.0,141.55,DoS-SYN_Flood,19,3,0
417549,0.0,54.0,6.0,64.0,25.718515,25.718515,0.0,0.0,1.0,0.0,...,9.5,10.392305,0.0,0.0,0.0,141.55,DDoS-SynonymousIP_Flood,12,2,0


## Real vs Resampled Dataset Analysis

In [8]:
full_data_resampled.describe()

Unnamed: 0,flow_duration,Header_Length,Protocol Type,Duration,Rate,Srate,Drate,fin_flag_number,syn_flag_number,rst_flag_number,...,IAT,Number,Magnitue,Radius,Covariance,Variance,Weight,33+1,8+1,1+1
count,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,...,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0,1638234.0
mean,5.569101,76247.93,9.064399,66.33944,9152.159,9152.159,9.511405e-06,0.08691738,0.2074154,0.09080571,...,83189260.0,9.498846,13.11828,47.21543,31123.97,0.09638938,141.5211,12.55915,2.304373,0.02348993
std,267.3291,459859.3,8.941006,13.97123,100516.7,100516.7,0.01045178,0.281714,0.4054558,0.2873327,...,17017110.0,0.8175156,8.627248,228.4536,330218.5,0.2330167,21.0281,5.853964,0.8018155,0.1514535
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,9.165151,0.0,0.0,0.0,1.0,0.0,0.0,0.0
25%,0.0,54.0,6.0,64.0,2.095238,2.095238,0.0,0.0,0.0,0.0,...,83071570.0,9.5,10.0,0.0,0.0,0.0,141.55,8.0,2.0,0.0
50%,0.0,54.0,6.0,64.0,15.76136,15.76136,0.0,0.0,0.0,0.0,...,83124520.0,9.5,10.3923,0.0,0.0,0.0,141.55,12.0,2.0,0.0
75%,0.1048855,278.3975,14.3,64.0,118.216,118.216,0.0,0.0,0.0,0.0,...,83343910.0,9.5,10.39651,0.5059213,1.344216,0.08,141.55,15.0,2.0,0.0
max,131456.2,9826475.0,47.0,255.0,8388608.0,8388608.0,13.3683,1.0,1.0,1.0,...,167639400.0,15.0,127.1625,13207.88,87711600.0,1.0,244.6,33.0,7.0,1.0


In [9]:
from ydata_profiling import ProfileReport

original_report = ProfileReport(full_data, title='Original Data', minimal=True)
resampled_report = ProfileReport(full_data_resampled, title='Resampled Data', minimal=True)
comparison_report = original_report.compare(resampled_report)
comparison_report.to_file('./profile_reports/smote_original_vs_resampled.html')

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

# Evaluator Model

## Imports

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

## Preprocessing
### Scaling Numerical Features

In [11]:
num_cols = [
    'flow_duration', 'Header_Length',  'Duration', 'Rate', 'Srate', 'ack_count', 'syn_count', 'fin_count',
    'urg_count', 'rst_count', 'Tot sum', 'Min', 'Max', 'AVG', 'Std', 'Tot size', 'IAT', 'Number', 'Magnitue',
    'Radius', 'Covariance', 'Variance', 'Weight'
]

scaler = StandardScaler()
full_data_resampled[num_cols] = scaler.fit_transform(full_data_resampled[num_cols])

### Splitting

In [19]:
X = full_data_resampled.drop(['label', '33+1', '8+1', '1+1'], axis=1)
y_all = full_data_resampled[['label', '33+1', '8+1', '1+1']]

X_train, X_test, y_train_all, y_test_all = train_test_split(X, y_all, test_size=0.2, random_state=42)

y_train = {}
y_test = {}
for _class in apply_classes:
    y_train[_class] = y_train_all[_class]
    y_test[_class] = y_test_all[_class]

print(f'X_train: {X_train.shape}, y_train: {y_train['33+1'].shape}, X_test: {X_test.shape}, y_test: {y_test['33+1'].shape}')

33+1
8+1
1+1
X_train: (1310587, 46), y_train: (1310587,), X_test: (327647, 46), y_test: (327647,)


## Training

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

for evaluator_type in apply_evaluators:
    match evaluator_type:
        case 'GradientBoosting':
            from sklearn.ensemble import HistGradientBoostingRegressor
            evaluator = HistGradientBoostingRegressor()
        case 'LogisticRegression':
            from sklearn.linear_model import LogisticRegression
            evaluator = LogisticRegression(random_state=42, n_jobs=-1)
        case 'Perceptron':
            from sklearn.linear_model import Perceptron
            evaluator = Perceptron(random_state=42, n_jobs=-1)
        case 'AdaBoost':
            from sklearn.ensemble import AdaBoostClassifier
            evaluator = AdaBoostClassifier(random_state=42, algorithm='SAMME')
        case 'RandomForest':
            from sklearn.ensemble import RandomForestClassifier
            evaluator = RandomForestClassifier(random_state=42, n_jobs=-1)
        case 'DeepNeuralNetwork':
            from sklearn.neural_network import MLPClassifier
            evaluator = MLPClassifier(random_state=42)
        case 'KNearestNeighbor':
            from sklearn.neighbors import KNeighborsClassifier
            evaluator = KNeighborsClassifier(n_jobs=-1)
        case _:
            print("Invalid model.")
    
    for _class in apply_classes:
        print(f'{datetime.now()} : Training {evaluator_type} on {_class} classes')
        evaluator.fit(X_train, y_train[_class])
    
        print(f'{datetime.now()} : Predicting {evaluator_type} on {_class} classes')
        y_pred = evaluator.predict(X_test)
    
        print(f'{evaluator_type} {_class} Metrics')
        print(f'   Accuracy: {accuracy_score(y_test[_class], y_pred)}')
        print(f'   Precision: {precision_score(y_test[_class], y_pred, average='weighted', zero_division=0.0)}')
        print(f'   Recall: {recall_score(y_test[_class], y_pred, average='weighted')}')
        print(f'   F1: {f1_score(y_test[_class], y_pred, average='weighted')}')
        print()

2024-05-08 20:46:11.096896 : Training LogisticRegression on 33+1 classes
2024-05-08 20:47:59.919150 : Predicting LogisticRegression on 33+1 classes
LogisticRegression Metrics
   Accuracy: 0.7926152230907043
   Precision: 0.7914735901882773
   Recall: 0.7926152230907043
   F1: 0.7525896638441408

2024-05-08 20:48:00.405758 : Training LogisticRegression on 8+1 classes
2024-05-08 20:48:35.177423 : Predicting LogisticRegression on 8+1 classes
LogisticRegression Metrics
   Accuracy: 0.8288127161243656
   Precision: 0.8178135251611328
   Recall: 0.8288127161243656
   F1: 0.7808116632154544

2024-05-08 20:48:35.496820 : Training LogisticRegression on 1+1 classes
2024-05-08 20:48:47.578584 : Predicting LogisticRegression on 1+1 classes
LogisticRegression Metrics
   Accuracy: 0.9889484719835677
   Precision: 0.9885614737481933
   Recall: 0.9889484719835677
   F1: 0.9887226331828096

2024-05-08 20:48:47.875849 : Training Perceptron on 33+1 classes
2024-05-08 20:49:19.596876 : Predicting Perceptr

## Model Analysis

In [16]:
from sklearn.metrics import confusion_matrix

cm = pd.DataFrame(confusion_matrix(y_test, y_pred), columns = full_label_encoder.classes_)
cm.insert(0, column='Actual', value=full_label_encoder.classes_)
cm

Unnamed: 0,Actual,Backdoor_Malware,BenignTraffic,BrowserHijacking,CommandInjection,DDoS-ACK_Fragmentation,DDoS-HTTP_Flood,DDoS-ICMP_Flood,DDoS-ICMP_Fragmentation,DDoS-PSHACK_Flood,...,Mirai-greip_flood,Mirai-udpplain,Recon-HostDiscovery,Recon-OSScan,Recon-PingSweep,Recon-PortScan,SqlInjection,Uploading_Attack,VulnerabilityScan,XSS
0,Backdoor_Malware,0,9,0,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0
1,BenignTraffic,2,5363,0,0,5,0,0,0,0,...,0,0,49,22,0,30,0,0,0,0
2,BrowserHijacking,0,22,1,0,0,0,0,0,0,...,0,0,6,1,0,1,0,0,0,0
3,CommandInjection,0,10,0,4,0,0,0,0,0,...,0,0,5,0,0,1,0,0,0,0
4,DDoS-ACK_Fragmentation,0,0,0,0,1403,0,1,2,2,...,0,0,0,0,0,0,0,0,1,0
5,DDoS-HTTP_Flood,0,0,0,0,0,112,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,DDoS-ICMP_Flood,0,0,0,0,3,0,37173,1,0,...,0,1,0,0,0,1,0,0,2,0
7,DDoS-ICMP_Fragmentation,0,0,0,0,3,1,4,2253,1,...,4,3,0,0,0,0,0,0,6,0
8,DDoS-PSHACK_Flood,0,0,0,0,1,0,1,0,21085,...,1,3,0,0,0,0,0,0,0,0
9,DDoS-RSTFINFlood,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
