In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt #Gives us Graphics
from sklearn.neural_network import MLPClassifier #Import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score, precision_score, f1_score
from sklearn.model_selection import train_test_split

In [2]:
url = 'https://raw.githubusercontent.com/beespinosa1/Inter/main/Escenario3.csv'
df = pd.read_csv(url)

In [3]:
names =['srcip', 'sport', 'dstip', 'dsport', 'proto', 'state', 'dur',
        'sbytes', 'dbytes', 'sttl', 'dttl', 'sloss', 'dloss', 'service',
        'Sload', 'Dload', 'Spkts', 'Dpkts', 'swin', 'dwin', 'stcpb',
        'dtcpb', 'smeansz', 'dmeansz', 'trans_depth', 'res_bdy_len',
        'Sjit', 'Djit', 'Stime', 'Ltime', 'Sintpkt', 'Dintpkt', 'tcprtt',
        'synack', 'ackdat', 'is_sm_ips_ports', 'ct_state_ttl',
        'ct_flw_http_mthd', 'is_ftp_login', 'ct_ftp_cmd', 'ct_srv_src',
        'ct_srv_dst', 'ct_dst_ltm', 'ct_src_ltm', 'ct_src_dport_ltm',
        'ct_dst_sport_ltm', 'ct_dst_src_ltm', 'attack_cat', 'Label']

In [4]:
df= pd.read_csv(url, names=names, low_memory=False)
dataframe= pd.read_csv(url, names=names, low_memory=False)

In [5]:
# Normalise the data
def dfNormalize(df):
    for feature_name in df.columns:
        df.loc[:,feature_name]= pd.to_numeric(df.loc[:,feature_name], errors='coerce').fillna(0)
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()   
        if (max_value - min_value) > 0:
            df.loc[:,feature_name] = (df.loc[:,feature_name] - min_value) / (max_value - min_value)
        else:
            df.loc[:,feature_name] = (df.loc[:,feature_name]- min_value)    
    return df

In [6]:
dataframe.shape
dataframe = dataframe.reindex(np.random.permutation(dataframe.index)).copy()
print(dataframe.describe())
print(list(dataframe))

              sport          dur         sbytes        dbytes         sttl  \
count   3500.000000  3500.000000    3500.000000  3.500000e+03  3500.000000   
mean   31926.315429     3.111007    3468.078571  2.876112e+04    35.811714   
std    19176.226410     7.019469    7187.232820  1.356012e+05    31.764679   
min        0.000000     0.000000      46.000000  0.000000e+00     0.000000   
25%    15392.000000     0.010383     568.000000  3.200000e+02    31.000000   
50%    31459.500000     0.576294    1540.000000  1.644000e+03    31.000000   
75%    48528.000000     2.254592    2662.000000  1.016800e+04    31.000000   
max    65534.000000    50.004387  115727.000000  1.641360e+06   254.000000   

              dttl        sloss        dloss         Sload         Dload  ...  \
count  3500.000000  3500.000000  3500.000000  3.500000e+03  3.500000e+03  ...   
mean     34.915714     5.156286    13.966857  1.862092e+06  1.346940e+06  ...   
std      37.409203     6.487297    47.821493  3.574078

In [7]:
keys = dataframe.keys()
data_to_process = dataframe[keys[4:len(keys)-1]].copy()

In [8]:
x_normalised=dfNormalize(data_to_process)
print(x_normalised.describe())

        proto   state          dur       sbytes       dbytes         sttl  \
count  3500.0  3500.0  3500.000000  3500.000000  3500.000000  3500.000000   
mean      0.0     0.0     0.062215     0.029582     0.017523     0.140991   
std       0.0     0.0     0.140377     0.062130     0.082615     0.125058   
min       0.0     0.0     0.000000     0.000000     0.000000     0.000000   
25%       0.0     0.0     0.000208     0.004512     0.000195     0.122047   
50%       0.0     0.0     0.011525     0.012915     0.001002     0.122047   
75%       0.0     0.0     0.045088     0.022614     0.006195     0.122047   
max       0.0     0.0     1.000000     1.000000     1.000000     1.000000   

              dttl        sloss        dloss  service  ...  is_ftp_login  \
count  3500.000000  3500.000000  3500.000000   3500.0  ...   3500.000000   
mean      0.138554     0.093751     0.023957      0.0  ...      0.006000   
std       0.148449     0.117951     0.082027      0.0  ...      0.077238   
mi

In [9]:
# get the train and test data
y = dataframe['Label']
x_train,x_test,y_train,y_test = train_test_split(x_normalised,
                                                 y,
                                                 test_size=0.3, # %test
                                                 random_state=0)

In [10]:
x_train.shape, x_test.shape

((2450, 44), (1050, 44))

In [11]:
# Create and train MLP classifier
clf = MLPClassifier(hidden_layer_sizes=(100, 100, 100), max_iter=300, random_state=0)
clf.fit(x_train, y_train)

MLPClassifier(hidden_layer_sizes=(100, 100, 100), max_iter=300, random_state=0)

In [12]:
# Predict the labels of the testing data
y_pred = clf.predict(x_test)

# Print predicted labels
print(y_pred)

[0 0 0 ... 0 0 0]


In [13]:
#Metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
print('METRICS FOR THE MODEL')
print('Accuracy:', accuracy)
print('Precision:', precision)
print('F1-score:', f1)

METRICS FOR THE MODEL
Accuracy: 0.9980952380952381
Precision: 0.9990118577075099
F1-score: 0.9866849273377463
