In [82]:
import pandas as pd
import numpy as np
import joblib
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

## Supporting Functions

In [83]:
def d_types_report(df):
    columns=[]
    d_types=[]
    uniques=[]
    n_uniques=[]
    null_values=[]
    null_values_percentage=[]
    rows = df.shape[0]
    
    for i in df.columns:
        columns.append(i)
        d_types.append(df[i].dtypes)
        uniques.append(df[i].unique()[:5])
        n_uniques.append(df[i].nunique())
        null_values.append(df[i].isna().sum())
        null_values_percentage.append(null_values[-1] * 100 / rows)
        
    return pd.DataFrame({"Columns": columns, "Data_Types": d_types, "Unique_values": uniques, "N_Uniques": n_uniques,  "Null_Values": null_values, "Null_Values_percentage": null_values_percentage})

In [84]:
df = pd.read_csv('../data/test_data.csv')
df.head()

Unnamed: 0,arp.opcode,arp.hw.size,icmp.checksum,icmp.seq_le,icmp.unused,http.content_length,http.request.method,http.referer,http.request.version,http.response,...,mqtt.proto_len,mqtt.protoname,mqtt.topic,mqtt.topic_len,mqtt.ver,mbtcp.len,mbtcp.trans_id,mbtcp.unit_id,Attack_label,Attack_type
0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.666667,0.428571,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.285714,0.0,...,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,1.0,12.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.666667,0.428571,0.0,...,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,1.0,9.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.666667,0.428571,0.0,...,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,1.0,9.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.285714,0.0,...,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,1.0,12.0


In [85]:
attack_type_mapping = {
    13: 'Vulnerability_scanner',
    6: 'Normal',
    4: 'DDOS_UDP',
    1: 'DDOS_HTTP',
    11: 'SQL_injection',
    8: 'Password',
    10: 'Ransomware',
    7: 'OS_Fingerprinting',
    0: 'Backdoor',
    14: 'XSS',
    12: 'Uploading',
    5: 'MITM',
    9: 'Port_Scanning',
    2: 'DDOS_ICMP',
    3: 'DDOS_TCP'
}

# Reverse the encoding using the attack_type_mapping
df['Attack_type'] = df['Attack_type'].map(attack_type_mapping)
df.head()

Unnamed: 0,arp.opcode,arp.hw.size,icmp.checksum,icmp.seq_le,icmp.unused,http.content_length,http.request.method,http.referer,http.request.version,http.response,...,mqtt.proto_len,mqtt.protoname,mqtt.topic,mqtt.topic_len,mqtt.ver,mbtcp.len,mbtcp.trans_id,mbtcp.unit_id,Attack_label,Attack_type
0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.666667,0.428571,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Normal
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.285714,0.0,...,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,1.0,Uploading
2,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.666667,0.428571,0.0,...,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,1.0,Port_Scanning
3,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.666667,0.428571,0.0,...,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,1.0,Port_Scanning
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.285714,0.0,...,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,1.0,Uploading


In [86]:
d_types_report(df)

Unnamed: 0,Columns,Data_Types,Unique_values,N_Uniques,Null_Values,Null_Values_percentage
0,arp.opcode,float64,"[0.0, 0.5, 1.0]",3,0,0.0
1,arp.hw.size,float64,"[0.0, 1.0]",2,0,0.0
2,icmp.checksum,float64,[0.0],1,0,0.0
3,icmp.seq_le,float64,[0.0],1,0,0.0
4,icmp.unused,float64,[0.0],1,0,0.0
5,http.content_length,float64,"[0.0, 0.625, 0.96875, 0.375, 0.75]",32,0,0.0
6,http.request.method,float64,"[0.2, 0.0, 0.4, 1.0, 0.8]",6,0,0.0
7,http.referer,float64,"[0.6666666666666666, 1.0, 0.3333333333333333, ...",4,0,0.0
8,http.request.version,float64,"[0.4285714285714285, 0.2857142857142857, 0.857...",8,0,0.0
9,http.response,float64,"[0.0, 1.0]",2,0,0.0


In [87]:
MODEL_PATH = '../model/finalmodel.pkl'
model = joblib.load(MODEL_PATH)

In [88]:
X = df.drop("Attack_type", axis=1)
y = df['Attack_type']

In [89]:
y_pred = model.predict(X)

In [90]:
y_pred

array(['DDOS_UDP', 'Normal', 'Normal', ..., 'Normal', 'Normal', 'Normal'],
      dtype=object)

In [91]:
accuracy = accuracy_score(y, y_pred)
print(accuracy)

0.21279136369372956


In [93]:
cm = confusion_matrix(y, y_pred)
df = pd.DataFrame(cm)

# You can also set column and index names if needed
column_names = ["Class " + str(i) for i in range(11)]
index_names = ["Actual " + str(i) for i in range(11)]
df.columns = column_names
df.index = index_names
cm.head()

AttributeError: 'numpy.ndarray' object has no attribute 'head'