### Preparing Pre-labeled CSVs from UNSW for processing 

In [1]:
from label.parallel_label import *

In [2]:
column_names=['src_ip','src_port','dst_ip','dst_port','proto','state','duration','sbytes','dbytes',
'sttl','dttl','sloss','dloss','service','Sload','Dload','Spkts','Dpkts','swin','dwin','stcpb','dtcpb',
'smeansz','dmeansz','trans_depth','res_bdy_len','Sjit','Djit','timestamp','Ltime','Sintpkt','Dintpkt',
'tcprtt','synack','ackdat','is_sm_ips_ports','ct_state_ttl','ct_flw_http_mthd','is_ftp_login',
'ct_ftp_cmd','ct_srv_src','ct_srv_dst','ct_dst_ltm','ct_src_ ltm','ct_src_dport_ltm',
'ct_dst_sport_ltm','ct_dst_src_ltm','attack_cat','Label']

Add column names to the csv of unsw. 

In [4]:
import os
import pandas as pd

def process_and_rename_columns(folder_path, new_column_names):
    # Ensure the column names list is not empty
    if not new_column_names:
        raise ValueError("New column names list cannot be empty.")

    # Iterate over all files in the specified folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            file_path = os.path.join(folder_path, filename)
            try:
                # Read the CSV file into a DataFrame
                df = pd.read_csv(file_path, header=None)  # Use header=None if there are no existing headers

                # Check if the number of new column names matches the number of columns in the CSV
                if len(new_column_names) != len(df.columns):
                    print(f"Skipping {file_path}: Number of new column names does not match number of columns.")
                    continue

                # Set the new column names
                df.columns = new_column_names

                # Save the DataFrame back to the same CSV file
                df.to_csv(file_path, index=False)  # Set index=False to avoid writing row numbers

                print(f"Processed {file_path}: Column names updated.")

            except Exception as e:
                print(f"Failed to process {file_path}: {e}")

folder_path = "/scratch/user/syedwali/Datasets/UNSW/provided_CSVs"  # Replace with the path to your folder
process_and_rename_columns(folder_path, column_names)


  df = pd.read_csv(file_path, header=None)  # Use header=None if there are no existing headers


Processed /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv: Column names updated.
Processed /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv: Column names updated.


  df = pd.read_csv(file_path, header=None)  # Use header=None if there are no existing headers


Processed /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv: Column names updated.
Processed /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_4.csv: Column names updated.


Adding the attack_category column with 'Benign' for entries where it was empty and the label was 0

In [22]:
import os
import pandas as pd

def process_and_update_csvs(folder_path):
    # Iterate over all files in the specified folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            file_path = os.path.join(folder_path, filename)
            try:
                # Read the CSV file into a DataFrame
                df = pd.read_csv(file_path, low_memory=False)
                
                # Check if 'attack_cat' column exists
                if 'attack_cat' in df.columns:
                    # Fill empty strings and NaN values in 'attack_cat' with 'Benign'
                    df['attack_cat'].replace('', 'Benign', inplace=True)  # Replace empty strings
                    df['attack_cat'].fillna('Benign', inplace=True)      # Replace NaN values

                    # Show value counts for 'attack_cat'
                    print(f"File: {file_path}")
                    print(df['attack_cat'].value_counts())

                    # Save the DataFrame back to the same CSV file
                    df.to_csv(file_path, index=False)  # Set index=False to avoid writing row numbers

                    print(f"Processed and updated {file_path}")

                else:
                    print(f"Skipping {file_path}: 'attack_cat' column not found.")

            except Exception as e:
                print(f"Failed to process {file_path}: {e}")

# Example usage
folder_path = "/scratch/user/syedwali/Datasets/UNSW/provided_CSVs"  # Replace with the path to your folder
process_and_update_csvs(folder_path)


File: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
attack_cat
Benign            677786
Generic             7522
Exploits            5409
 Fuzzers            5051
Reconnaissance      1759
DoS                 1167
Backdoors            534
Analysis             526
Shellcode            223
Worms                 24
Name: count, dtype: int64
Processed and updated /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
File: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv
attack_cat
Benign              542576
Generic             118198
Exploits             16574
 Fuzzers              9137
DoS                   5642
 Reconnaissance       5582
Analysis               873
Backdoor               759
 Shellcode             593
Worms                   67
Name: count, dtype: int64
Processed and updated /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv
File: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv
attack_ca

### Extracting meta data from provided csvs (labeled csv)

In [1]:
from label.parallel_label import *
if __name__ == '__main__':
    #set_start_method("fork")  # or "spawn" depending on your system

    # Example usage:
    folders = ["/scratch/user/syedwali/Datasets/UNSW/provided_CSVs"]

    # Step 1: Extract time ranges from CSV files in all folders using multiple cores
    meta_data = extract_time_ranges_from_csvs(folders, timestamp_column='timestamp', timezone='None', batch_size=5)

    # Print resulting metadata
    print(meta_data)

INFO:label.parallel_label:Processed /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_4.csv: Min timestamp = 1424249984000, Max timestamp = 1424262068000
INFO:label.parallel_label:Processed /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv: Min timestamp = 1424231099000, Max timestamp = 1424250009000
INFO:label.parallel_label:Processed /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv: Min timestamp = 1421955795000, Max timestamp = 1424231129000
INFO:label.parallel_label:Processed /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv: Min timestamp = 1421927377000, Max timestamp = 1421955842000


                                           file_path  min_timestamp  \
0  /scratch/user/syedwali/Datasets/UNSW/provided_...  1421927377000   
1  /scratch/user/syedwali/Datasets/UNSW/provided_...  1424231099000   
2  /scratch/user/syedwali/Datasets/UNSW/provided_...  1421955795000   
3  /scratch/user/syedwali/Datasets/UNSW/provided_...  1424249984000   

   max_timestamp  
0  1421955842000  
1  1424250009000  
2  1424231129000  
3  1424262068000  


### Labeling

We are now labeling our processed CSV files using the labeled CSVs from UNSW. Since UNSW uses Unix timestamps, we are setting the timezone to None. Additionally, Unix timestamps have second-level precision, which we are considering. In the UNSW dataset, the labels are in the column named attack_cat, which we have also defined as function attributes.

In [2]:
import warnings
from label.parallel_label import *
# Suppress all warnings
warnings.filterwarnings("ignore")
label_csvs(input_folder="/scratch/user/syedwali/Datasets/UNSW/processed/", time_ranges_df=meta_data, timezone='None',num_workers=4,
          unit='sec',flowduration_col='duration',label_col='attack_cat')

Processing CSVs:   0%|          | 0/79 [00:00<?, ?it/s]

Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv


Processing CSVs:   1%|▏         | 1/79 [01:52<2:26:03, 112.35s/it]

Finished processing 11_1000000075_20240905-205247_processed.csv.
Label distribution:
label
Benign    18001
Name: count, dtype: int64


Processing CSVs:   3%|▎         | 2/79 [01:53<1:00:34, 47.21s/it] 

Finished processing 14_1028986624_20240905-211644_processed.csv.
Label distribution:
label
Benign      18478
No Match        2
Name: count, dtype: int64
Finished processing 3_1000000694_20240905-205641_processed.csv.
Label distribution:
label
Benign            17366
Exploits            428
 Fuzzers            231
Reconnaissance      211
DoS                  72
Generic              58
Shellcode            24
Backdoors             3
No Match              2
Worms                 2
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:   5%|▌         | 4/79 [04:09<1:15:04, 60.06s/it]

Finished processing 31_processed.csv.
Label distribution:
label
Benign      24322
No Match        1
Name: count, dtype: int64


Processing CSVs:   6%|▋         | 5/79 [04:24<56:53, 46.13s/it]  

Finished processing 41_processed.csv.
Label distribution:
label
Benign      27352
No Match        2
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:   8%|▊         | 6/79 [06:09<1:18:10, 64.25s/it]

Finished processing 35_processed.csv.
Label distribution:
label
Benign      16754
No Match        2
Name: count, dtype: int64


Processing CSVs:   9%|▉         | 7/79 [06:09<53:43, 44.78s/it]  

Finished processing 33_processed.csv.
Label distribution:
label
Benign    17940
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  10%|█         | 8/79 [07:20<1:02:20, 52.68s/it]

Finished processing 53_processed.csv.
Label distribution:
label
Benign    13020
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  11%|█▏        | 9/79 [07:58<56:20, 48.29s/it]  

Finished processing 46_processed.csv.
Label distribution:
label
Benign    18138
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  13%|█▎        | 10/79 [09:21<1:07:27, 58.66s/it]

Finished processing 11_processed.csv.
Label distribution:
label
Benign              36503
Exploits             1121
 Fuzzers              455
 Reconnaissance       420
DoS                   161
Generic               159
 Shellcode             54
Backdoor               31
Worms                   9
Analysis                3
No Match                2
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  14%|█▍        | 11/79 [11:10<1:23:34, 73.74s/it]

Finished processing 19_1028414664_20240905-211405_processed.csv.
Label distribution:
label
Benign    17651
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  15%|█▌        | 12/79 [12:01<1:14:42, 66.91s/it]

Finished processing 32_processed.csv.
Label distribution:
label
Benign    17635
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  16%|█▋        | 13/79 [12:54<1:09:03, 62.78s/it]

Finished processing 1_processed.csv.
Label distribution:
label
Benign              15144
Exploits              337
 Fuzzers              330
No Match              172
 Reconnaissance       141
Generic                48
DoS                    47
 Shellcode             20
Worms                   3
Backdoor                2
Name: count, dtype: int64


Processing CSVs:  18%|█▊        | 14/79 [13:05<51:02, 47.11s/it]  

Finished processing 7_processed.csv.
Label distribution:
label
Benign              35826
Exploits             1105
 Fuzzers              702
 Reconnaissance       418
Generic               157
DoS                   147
 Shellcode             58
Worms                  10
Backdoor               10
No Match                4
Analysis                1
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_4.csv


Processing CSVs:  19%|█▉        | 15/79 [13:55<51:19, 48.12s/it]

Finished processing 4_1000000277_20240905-215800_processed.csv.
Label distribution:
label
Benign            17876
Exploits            430
 Fuzzers            211
Reconnaissance      189
DoS                  67
Generic              63
Shellcode            23
Backdoors             6
Worms                 2
No Match              2
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  20%|██        | 16/79 [14:31<46:37, 44.40s/it]

Finished processing 3_processed.csv.
Label distribution:
label
Benign              37405
Exploits             1034
 Fuzzers              624
 Reconnaissance       431
Generic               203
DoS                   175
 Shellcode             56
Backdoor               16
Worms                   9
Analysis                3
No Match                3
Name: count, dtype: int64


Processing CSVs:  22%|██▏       | 17/79 [14:57<40:04, 38.78s/it]

Finished processing 51_processed.csv.
Label distribution:
label
Benign    18595
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_4.csv


Processing CSVs:  23%|██▎       | 18/79 [16:01<47:10, 46.41s/it]

Finished processing 50_processed.csv.
Label distribution:
label
Benign      21959
No Match        2
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_4.csv


Processing CSVs:  24%|██▍       | 19/79 [19:26<1:34:03, 94.05s/it]

Finished processing 6_processed.csv.
Label distribution:
label
Benign              38782
Exploits              881
 Fuzzers              414
 Reconnaissance       401
DoS                   136
Generic               126
 Shellcode             50
Backdoor               11
Worms                   4
No Match                3
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  25%|██▌       | 20/79 [21:19<1:38:03, 99.71s/it]

Finished processing 30_processed.csv.
Label distribution:
label
Benign    18987
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  27%|██▋       | 21/79 [22:44<1:32:13, 95.41s/it]

Finished processing 20_processed.csv.
Label distribution:
label
Benign              38418
Exploits             1026
 Reconnaissance       421
 Fuzzers              372
Generic               165
DoS                   149
 Shellcode             58
Backdoor               17
Worms                   7
Analysis                5
No Match                2
Name: count, dtype: int64


Processing CSVs:  28%|██▊       | 22/79 [23:13<1:11:42, 75.49s/it]

Finished processing 45_processed.csv.
Label distribution:
label
Benign    19232
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_4.csv


Processing CSVs:  29%|██▉       | 23/79 [24:03<1:03:16, 67.80s/it]

Finished processing 25_processed.csv.
Label distribution:
label
Benign              38033
Exploits             1045
 Fuzzers              797
 Reconnaissance       425
Generic               172
DoS                   168
 Shellcode             56
Backdoor               17
Worms                   9
No Match                3
Analysis                1
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_4.csv


Processing CSVs:  30%|███       | 24/79 [27:37<1:42:22, 111.68s/it]

Finished processing 10_processed.csv.
Label distribution:
label
Benign              35937
Exploits              850
 Fuzzers              475
 Reconnaissance       392
Generic               137
DoS                   117
 Shellcode             53
Backdoor               15
Worms                   4
No Match                3
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  32%|███▏      | 25/79 [29:38<1:42:54, 114.35s/it]

Finished processing 25_1029881776_20240905-214109_processed.csv.
Label distribution:
label
Benign    20768
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  33%|███▎      | 26/79 [31:29<1:40:09, 113.38s/it]

Finished processing 34_processed.csv.
Label distribution:
label
Benign    18543
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  34%|███▍      | 27/79 [32:46<1:28:55, 102.60s/it]

Finished processing 22_processed.csv.
Label distribution:
label
Benign              39684
Exploits              794
 Fuzzers              583
 Reconnaissance       380
DoS                   110
Generic               107
Analysis               59
 Shellcode             46
No Match               37
Backdoor               11
Worms                   3
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  35%|███▌      | 28/79 [33:41<1:14:59, 88.22s/it] 

Finished processing 1_1000000847_20240905-213056_processed.csv.
Label distribution:
label
Benign            21390
 Fuzzers            707
Exploits            503
Reconnaissance      210
DoS                  67
Generic              67
Shellcode            30
Worms                 4
Backdoors             3
No Match              2
Name: count, dtype: int64


Processing CSVs:  37%|███▋      | 29/79 [33:49<53:32, 64.25s/it]  

Finished processing 21_processed.csv.
Label distribution:
label
Benign              37864
Exploits              919
 Fuzzers              546
 Reconnaissance       403
Generic               153
DoS                   126
 Shellcode             51
Backdoor               14
Worms                   6
No Match                5
Analysis                1
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv


Processing CSVs:  38%|███▊      | 30/79 [34:29<46:33, 57.01s/it]

Finished processing 43_processed.csv.
Label distribution:
label
Benign    16263
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  39%|███▉      | 31/79 [35:16<43:01, 53.79s/it]

Finished processing 23_processed.csv.
Label distribution:
label
No Match            40607
Benign              24884
 Fuzzers              608
Exploits              585
 Reconnaissance       248
Generic                87
DoS                    78
 Shellcode             33
Backdoor               12
Worms                   3
Analysis                1
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  41%|████      | 32/79 [35:49<37:22, 47.70s/it]

Finished processing 26_1030712176_20240905-205051_processed.csv.
Label distribution:
label
Benign    20604
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  42%|████▏     | 33/79 [36:29<34:52, 45.48s/it]

Finished processing 40_processed.csv.
Label distribution:
label
Benign    20915
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  43%|████▎     | 34/79 [38:27<50:24, 67.21s/it]

Finished processing 5_1000000832_20240905-202039_processed.csv.
Label distribution:
label
Benign            18202
 Fuzzers            829
Exploits            475
Reconnaissance      214
Generic              66
DoS                  66
Shellcode            29
Analysis             14
Backdoors             8
Worms                 7
No Match              3
Name: count, dtype: int64


Processing CSVs:  44%|████▍     | 35/79 [38:33<35:46, 48.79s/it]

Finished processing 48_processed.csv.
Label distribution:
label
Benign    21760
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  46%|████▌     | 36/79 [40:50<53:55, 75.24s/it]

Finished processing 16_1030797880_20240905-210615_processed.csv.
Label distribution:
label
Benign    20498
Name: count, dtype: int64


Processing CSVs:  47%|████▋     | 37/79 [40:53<37:28, 53.54s/it]

Finished processing 15_1000000964_20240905-203852_processed.csv.
Label distribution:
label
Benign    21049
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  48%|████▊     | 38/79 [42:26<44:38, 65.33s/it]

Finished processing 4_processed.csv.
Label distribution:
label
Benign              36818
Exploits              925
 Fuzzers              705
 Reconnaissance       415
DoS                   133
Generic               124
Analysis               59
 Shellcode             53
No Match               23
Backdoor               17
Worms                   4
Name: count, dtype: int64


Processing CSVs:  49%|████▉     | 39/79 [42:56<36:31, 54.79s/it]

Finished processing 37_processed.csv.
Label distribution:
label
Benign    21614
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv


Processing CSVs:  51%|█████     | 40/79 [43:03<26:21, 40.54s/it]

Finished processing 36_processed.csv.
Label distribution:
label
Benign      22561
No Match        2
Name: count, dtype: int64


Processing CSVs:  52%|█████▏    | 41/79 [43:06<18:32, 29.27s/it]

Finished processing 12_processed.csv.
Label distribution:
label
Benign              36120
Exploits              820
 Fuzzers              552
 Reconnaissance       399
Generic               149
DoS                   116
Analysis               53
 Shellcode             51
Backdoor               16
No Match                5
Worms                   4
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_4.csv


Processing CSVs:  53%|█████▎    | 42/79 [44:59<33:23, 54.14s/it]

Finished processing 23_1029096252_20240905-213450_processed.csv.
Label distribution:
label
Benign    18550
Name: count, dtype: int64


Processing CSVs:  54%|█████▍    | 43/79 [45:02<23:17, 38.81s/it]

Finished processing 17_1030213624_20240905-203030_processed.csv.
Label distribution:
label
Benign      20053
No Match        2
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv


Processing CSVs:  56%|█████▌    | 44/79 [46:58<36:17, 62.21s/it]

Finished processing 14_processed.csv.
Label distribution:
label
Benign              34156
 Fuzzers              808
Exploits              796
 Reconnaissance       359
Generic               134
DoS                   103
 Shellcode             45
Backdoor               17
Worms                   6
No Match                4
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_4.csv


Processing CSVs:  57%|█████▋    | 45/79 [49:41<52:15, 92.21s/it]

Finished processing 15_processed.csv.
Label distribution:
label
Benign              34486
Exploits              912
 Reconnaissance       376
 Fuzzers              347
DoS                   149
Generic               122
 Shellcode             50
Backdoor                8
Worms                   6
No Match                3
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv


Processing CSVs:  58%|█████▊    | 46/79 [51:58<58:12, 105.82s/it]

Finished processing 26_processed.csv.
Label distribution:
label
Benign              38867
Exploits              862
 Fuzzers              665
 Reconnaissance       390
Generic               123
DoS                   118
Analysis               55
 Shellcode             49
Backdoor               15
No Match                4
Worms                   3
Name: count, dtype: int64


Processing CSVs:  59%|█████▉    | 47/79 [52:05<40:40, 76.26s/it] 

Finished processing 24_processed.csv.
Label distribution:
label
Benign              38286
Exploits              954
 Reconnaissance       440
 Fuzzers              392
DoS                   147
Generic               139
 Shellcode             55
Backdoor               11
Worms                   5
No Match                1
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  61%|██████    | 48/79 [53:58<44:57, 87.03s/it]

Finished processing 44_processed.csv.
Label distribution:
label
Benign    17881
Name: count, dtype: int64


Processing CSVs:  62%|██████▏   | 49/79 [54:00<30:45, 61.53s/it]

Finished processing 49_processed.csv.
Label distribution:
label
Benign    21192
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv


Processing CSVs:  63%|██████▎   | 50/79 [54:33<25:42, 53.18s/it]

Finished processing 9_processed.csv.
Label distribution:
label
Benign              38091
 Fuzzers             1044
Exploits              920
 Reconnaissance       417
DoS                   141
Generic               122
 Shellcode             55
Backdoor               13
Worms                   6
No Match                5
Analysis                3
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  65%|██████▍   | 51/79 [56:23<32:43, 70.12s/it]

Finished processing 42_processed.csv.
Label distribution:
label
Benign      18375
No Match        2
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  66%|██████▌   | 52/79 [58:02<35:25, 78.73s/it]

Finished processing 21_1028743892_20240905-213253_processed.csv.
Label distribution:
label
Benign    17635
Name: count, dtype: int64


Processing CSVs:  67%|██████▋   | 53/79 [58:10<24:59, 57.67s/it]

Failed to process 27_processed.csv. Error: Can only use .str accessor with string values!
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  68%|██████▊   | 54/79 [58:46<21:19, 51.16s/it]

Finished processing 8_1000001035_20240905-214538_processed.csv.
Label distribution:
label
Benign            17497
Exploits            423
 Fuzzers            405
Reconnaissance      205
DoS                  68
Generic              45
Shellcode            25
Backdoors             8
Analysis              3
Worms                 1
Name: count, dtype: int64


Processing CSVs:  70%|██████▉   | 55/79 [58:48<14:30, 36.27s/it]

Finished processing 8_processed.csv.
Label distribution:
label
Benign              34889
Exploits              859
 Fuzzers              616
 Reconnaissance       411
Generic               147
DoS                   110
Analysis               55
 Shellcode             46
Backdoor               13
No Match                5
Worms                   4
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv


Processing CSVs:  71%|███████   | 56/79 [1:00:04<18:29, 48.22s/it]

Finished processing 47_processed.csv.
Label distribution:
label
Benign      19240
No Match        2
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  72%|███████▏  | 57/79 [1:00:30<15:15, 41.63s/it]

Finished processing 18_processed.csv.
Label distribution:
label
Benign              21517
Exploits              487
 Fuzzers              383
 Reconnaissance       217
DoS                    83
Generic                69
 Shellcode             34
Backdoor                7
Analysis                5
No Match                3
Worms                   1
Name: count, dtype: int64


Processing CSVs:  73%|███████▎  | 58/79 [1:00:46<11:50, 33.85s/it]

Finished processing 24_1029809696_20240905-202437_processed.csv.
Label distribution:
label
Benign    20043
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  75%|███████▍  | 59/79 [1:01:50<14:19, 42.98s/it]

Finished processing 2_1000000159_20240905-204444_processed.csv.
Label distribution:
label
Benign            15581
 Fuzzers            483
Exploits            374
Reconnaissance      161
Generic              71
DoS                  44
Shellcode            23
Backdoors             6
No Match              2
Worms                 2
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  76%|███████▌  | 60/79 [1:02:25<12:50, 40.55s/it]

Finished processing 18_1029238692_20240905-203437_processed.csv.
Label distribution:
label
Benign    19241
Name: count, dtype: int64


Processing CSVs:  77%|███████▋  | 61/79 [1:02:36<09:30, 31.69s/it]

Finished processing 28_processed.csv.
Label distribution:
label
Benign    18272
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  78%|███████▊  | 62/79 [1:04:13<14:29, 51.14s/it]

Finished processing 27_1028678596_20240905-212657_processed.csv.
Label distribution:
label
Benign    17466
Name: count, dtype: int64


Processing CSVs:  80%|███████▉  | 63/79 [1:04:26<10:37, 39.84s/it]

Finished processing 22_1029011312_20240905-202237_processed.csv.
Label distribution:
label
Benign    18057
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  81%|████████  | 64/79 [1:05:54<13:36, 54.41s/it]

Finished processing 52_processed.csv.
Label distribution:
label
Benign    17635
Name: count, dtype: int64


Processing CSVs:  82%|████████▏ | 65/79 [1:06:01<09:21, 40.07s/it]

Finished processing 13_processed.csv.
Label distribution:
label
Benign    17560
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  84%|████████▎ | 66/79 [1:06:26<07:40, 35.41s/it]

Finished processing 20_1030212288_20240905-203637_processed.csv.
Label distribution:
label
Benign      20483
No Match        2
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  85%|████████▍ | 67/79 [1:07:54<10:15, 51.27s/it]

Finished processing 39_processed.csv.
Label distribution:
label
Benign      18969
No Match        3
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  86%|████████▌ | 68/79 [1:08:19<07:56, 43.28s/it]

Finished processing 29_processed.csv.
Label distribution:
label
Benign    18693
Name: count, dtype: int64


Processing CSVs:  87%|████████▋ | 69/79 [1:08:21<05:10, 31.07s/it]

Finished processing 17_processed.csv.
Label distribution:
label
Benign              34039
Exploits              764
 Fuzzers              498
 Reconnaissance       353
Generic               109
DoS                   105
Analysis               52
 Shellcode             40
Backdoor               11
No Match                5
Worms                   3
Name: count, dtype: int64


Processing CSVs:  89%|████████▊ | 70/79 [1:08:27<03:30, 23.42s/it]

Finished processing 6_1028295840_20240905-211841_processed.csv.
Label distribution:
label
Benign            17436
 Fuzzers            428
Exploits            406
Reconnaissance      205
Generic              88
DoS                  52
Shellcode            27
Backdoors             6
No Match              3
Worms                 3
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  90%|████████▉ | 71/79 [1:09:51<05:33, 41.64s/it]

Finished processing 12_1000001417_20240905-205955_processed.csv.
Label distribution:
label
Benign    19786
Name: count, dtype: int64


Processing CSVs:  91%|█████████ | 72/79 [1:10:19<04:23, 37.67s/it]

Finished processing 38_processed.csv.
Label distribution:
label
Benign    20644
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_2.csv


Processing CSVs:  92%|█████████▏| 73/79 [1:10:32<03:01, 30.26s/it]

Finished processing 10_1000001192_20240905-215144_processed.csv.
Label distribution:
label
Benign    21939
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_4.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv


Processing CSVs:  94%|█████████▎| 74/79 [1:12:59<05:26, 65.28s/it]

Finished processing 5_processed.csv.
Label distribution:
label
Benign              36281
Exploits              836
 Fuzzers              836
 Reconnaissance       372
Generic               133
DoS                   114
 Shellcode             52
Backdoor               13
Worms                   6
No Match                2
Name: count, dtype: int64


Processing CSVs:  95%|█████████▍| 75/79 [1:13:08<03:12, 48.22s/it]

Finished processing 7_1029959340_20240905-210156_processed.csv.
Label distribution:
label
Benign            19772
Exploits            480
 Fuzzers            354
Reconnaissance      224
Generic              77
DoS                  71
Analysis             52
Shellcode            28
Backdoors             9
No Match              3
Worms                 2
Name: count, dtype: int64
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_1.csv
Reading: /scratch/user/syedwali/Datasets/UNSW/provided_CSVs/UNSW-NB15_3.csv


Processing CSVs:  96%|█████████▌| 76/79 [1:14:08<02:35, 51.95s/it]

Finished processing 2_processed.csv.
Label distribution:
label
Benign              33769
Exploits              827
 Reconnaissance       381
 Fuzzers              250
DoS                   125
Generic               117
 Shellcode             46
Backdoor                9
Worms                   5
No Match                3
Name: count, dtype: int64


Processing CSVs:  97%|█████████▋| 77/79 [1:14:51<01:38, 49.05s/it]

Finished processing 19_processed.csv.
Label distribution:
label
Benign              33806
Exploits              793
 Fuzzers              619
 Reconnaissance       339
Generic               126
DoS                   114
 Shellcode             46
Backdoor               17
Worms                   4
No Match                3
Name: count, dtype: int64


Processing CSVs:  99%|█████████▊| 78/79 [1:14:58<00:36, 36.43s/it]

Finished processing 9_1000001414_20240905-212854_processed.csv.
Label distribution:
label
Benign            17489
 Fuzzers            210
Exploits            207
Reconnaissance      104
Generic              36
DoS                  29
Shellcode            14
Backdoors             1
Worms                 1
Name: count, dtype: int64


Processing CSVs: 100%|██████████| 79/79 [1:23:53<00:00, 63.71s/it] 

Finished processing 16_processed.csv.
Label distribution:
label
Benign              39330
Exploits              988
 Fuzzers              720
 Reconnaissance       414
Generic               162
DoS                   134
 Shellcode             53
Backdoor               17
Worms                   8
No Match                2
Analysis                1
Name: count, dtype: int64





Reading labeled CSV files from the UNSW dataset folder. Due to the dataset's large size, each CSV file is read individually, with columns related to ports, IPs, MAC addresses, and payloads being dropped. Additionally, the label count for each file is limited to a maximum of 5000. Unmatched rows have been removed, and the resulting DataFrame is exported. Users can adjust these settings based on their system requirements.

In [3]:
import os
import pandas as pd
from tqdm import tqdm

# Function to process all CSV files in a folder
def process_csvs_in_folder(input_folder):
    # Initialize an empty DataFrame to store the combined results
    combined_df = pd.DataFrame()

    # Iterate over all CSV files in the input folder with progress bar
    csv_files = [f for f in os.listdir(input_folder) if f.endswith(".csv")]
    
    for csv_file in tqdm(csv_files, desc="Processing CSV files"):
        try:
            # Read the CSV file
            file_path = os.path.join(input_folder, csv_file)
            df = pd.read_csv(file_path, low_memory=False)
            
            # Drop unnecessary columns
            df.drop(['id','expiration_id','src_ip','src_mac','src_oui','src_port','dst_ip','dst_mac','dst_oui','dst_port',
                     'protocol','ip_version','vlan_id','tunnel_id','bidirectional_first_seen_ms','bidirectional_last_seen_ms',
                     'src2dst_first_seen_ms','src2dst_last_seen_ms','dst2src_first_seen_ms','dst2src_last_seen_ms',
                     'application_name', 'application_category_name', 'application_is_guessed', 'application_confidence',
                     'requested_server_name','client_fingerprint','server_fingerprint','user_agent','content_type',
                     'udps.payload_data','udps.delta_time','udps.packet_direction','udps.ip_size','udps.transport_size',
                     'udps.payload_size','udps.syn','udps.cwr','udps.ece','udps.urg','udps.ack','udps.psh','udps.rst',
                     'udps.fin','file'], axis=1, inplace=True)

            # Drop rows with missing values
            df.dropna(inplace=True)

            # Remove rows where the label is 'No Match'
            df = df[df['label'] != 'No Match']

            # Limit the number of instances per class label to 5000
            df = df.groupby('label').apply(lambda x: x.head(5000)).reset_index(drop=True)

            # Append the processed DataFrame to the combined DataFrame
            combined_df = pd.concat([combined_df, df], ignore_index=True)
        
        except Exception as e:
            print(f"Failed to process {csv_file}: {e}")

    # Return the combined DataFrame
    return combined_df

# Example usage:
input_folder = "/scratch/user/syedwali/Datasets/UNSW/processed/labeled_csv"
df = process_csvs_in_folder(input_folder)


Processing CSV files: 100%|██████████| 78/78 [23:05<00:00, 17.77s/it]


In [4]:
df['label'] = df['label'].str.strip()

In [5]:
df['label'].value_counts()

label
Benign            390000
Exploits           25158
Fuzzers            18199
Reconnaissance     11086
Generic             3861
DoS                 3642
Shellcode           1433
Analysis             426
Backdoor             340
Worms                156
Backdoors             50
Name: count, dtype: int64

In [6]:
# If needed, save the combined DataFrame to a CSV
output_file = "/scratch/user/syedwali/Datasets/UNSW_dataset.csv"
df.to_csv(output_file, index=False)

In [12]:
df=pd.read_csv("/scratch/user/syedwali/Datasets/UNSW_dataset.csv")

In [13]:
df['label'] = df['label'].replace(['Backdoors', 'Backdoor'], 'Backdoor')

In [14]:
df['label'].value_counts()

label
Benign            390000
Exploits           25158
Fuzzers            18199
Reconnaissance     11086
Generic             3861
DoS                 3642
Shellcode           1433
Analysis             426
Backdoor             390
Worms                156
Name: count, dtype: int64

In [15]:
df.drop(['flowid'],axis=1,inplace=True)

In [8]:
df.columns

Index(['bidirectional_duration_ms', 'bidirectional_packets',
       'bidirectional_bytes', 'src2dst_duration_ms', 'src2dst_packets',
       'src2dst_bytes', 'dst2src_duration_ms', 'dst2src_packets',
       'dst2src_bytes', 'bidirectional_min_ps', 'bidirectional_mean_ps',
       'bidirectional_stddev_ps', 'bidirectional_max_ps', 'src2dst_min_ps',
       'src2dst_mean_ps', 'src2dst_stddev_ps', 'src2dst_max_ps',
       'dst2src_min_ps', 'dst2src_mean_ps', 'dst2src_stddev_ps',
       'dst2src_max_ps', 'bidirectional_min_piat_ms',
       'bidirectional_mean_piat_ms', 'bidirectional_stddev_piat_ms',
       'bidirectional_max_piat_ms', 'src2dst_min_piat_ms',
       'src2dst_mean_piat_ms', 'src2dst_stddev_piat_ms', 'src2dst_max_piat_ms',
       'dst2src_min_piat_ms', 'dst2src_mean_piat_ms', 'dst2src_stddev_piat_ms',
       'dst2src_max_piat_ms', 'bidirectional_syn_packets',
       'bidirectional_cwr_packets', 'bidirectional_ece_packets',
       'bidirectional_urg_packets', 'bidirectional_ack_p

### Performance Evaluation with contextual Features

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Assuming df is already defined and has a 'label' column
X = df.drop('label', axis=1)  # Features (drop the label column)
y = df['label']  # Labels

# Split the data (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

# Initialize the Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42,n_jobs=-1)

# Train the classifier
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(report)

Accuracy: 0.98
Classification Report:
                precision    recall  f1-score   support

      Analysis       0.80      0.82      0.81       128
      Backdoor       0.83      0.57      0.68       112
        Benign       1.00      1.00      1.00    117065
           DoS       0.71      0.26      0.38      1075
      Exploits       0.84      0.92      0.88      7531
       Fuzzers       0.92      0.95      0.93      5408
       Generic       0.88      0.72      0.79      1164
Reconnaissance       0.91      0.91      0.91      3363
     Shellcode       0.63      0.62      0.62       423
         Worms       0.64      0.43      0.52        37

      accuracy                           0.98    136306
     macro avg       0.82      0.72      0.75    136306
  weighted avg       0.98      0.98      0.98    136306



### Performance Evaluation without contextual Features

To demonstrate the impact of extended contextual features on classification performance, we've removed all such features from our analysis. By doing so, we can observe the potential degradation in classification accuracy when contextual information is limited.

In [17]:
df.drop(['udps.srcdst_packet_size_variation','udps.srcdst_udp_packet_count','udps.udp_packet_count',
'udps.srcdst_tcp_packet_count','udps.tcp_packet_count','udps.srcdst_ack_packet_count',
'udps.ack_packet_count','udps.srcdst_fin_packet_count','udps.fin_packet_count',
'udps.srcdst_rst_packet_count','udps.rst_packet_count','udps.srcdst_psh_packet_count',
'udps.psh_packet_count','udps.srcdst_syn_packet_count','udps.syn_packet_count','udps.srcdst_unique_ports_count',
'udps.srcdst_icmp_packet_count','udps.icmp_packet_count','udps.srcdst_http_ports_count','udps.http_ports_count',
'udps.srcdst_bidirectional_duration_avg','udps.bidirectional_duration_avg','udps.srcdst_dns_port_count',
'udps.dns_port_count','udps.srcdst_dns_port_src_count','udps.dns_port_src_count','udps.srcdst_vul_ports_count',
'udps.src2dst_packet_count','udps.bidirectional_packet_count','udps.srcdst_src2dst_packet_count',
'udps.srcdst_bidirectional_packet_count'],axis=1,inplace=True)

In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Assuming df is already defined and has a 'label' column
X = df.drop('label', axis=1)  # Features (drop the label column)
y = df['label']  # Labels

# Split the data (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

# Initialize the Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42,n_jobs=-1)

# Train the classifier
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(report)

Accuracy: 0.97
Classification Report:
                precision    recall  f1-score   support

      Analysis       0.38      0.27      0.31       128
      Backdoor       0.86      0.58      0.69       112
        Benign       1.00      0.99      0.99    117065
           DoS       0.70      0.36      0.48      1075
      Exploits       0.83      0.91      0.86      7531
       Fuzzers       0.79      0.86      0.83      5408
       Generic       0.87      0.79      0.83      1164
Reconnaissance       0.90      0.90      0.90      3363
     Shellcode       0.58      0.53      0.56       423
         Worms       0.61      0.54      0.57        37

      accuracy                           0.97    136306
     macro avg       0.75      0.67      0.70    136306
  weighted avg       0.97      0.97      0.97    136306

