# Escenarios experimentales Internetworking
Presentado por: Jonathan Toapanta
Fecha: 23/02/2023

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt #Gives us Graphics
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import VGG16
from tensorflow.keras.optimizers import Adam

In [10]:
# Load the dataset
url = 'https://raw.githubusercontent.com/beespinosa1/Inter/main/Escenario3.csv'
df = pd.read_csv(url)

In [11]:
names =['srcip', 'sport', 'dstip', 'dsport', 'proto', 'state', 'dur',
        'sbytes', 'dbytes', 'sttl', 'dttl', 'sloss', 'dloss', 'service',
        'Sload', 'Dload', 'Spkts', 'Dpkts', 'swin', 'dwin', 'stcpb',
        'dtcpb', 'smeansz', 'dmeansz', 'trans_depth', 'res_bdy_len',
        'Sjit', 'Djit', 'Stime', 'Ltime', 'Sintpkt', 'Dintpkt', 'tcprtt',
        'synack', 'ackdat', 'is_sm_ips_ports', 'ct_state_ttl',
        'ct_flw_http_mthd', 'is_ftp_login', 'ct_ftp_cmd', 'ct_srv_src',
        'ct_srv_dst', 'ct_dst_ltm', 'ct_src_ltm', 'ct_src_dport_ltm',
        'ct_dst_sport_ltm', 'ct_dst_src_ltm', 'attack_cat', 'Label']

df= pd.read_csv(url, names=names, low_memory=False)


In [12]:
# Normalise the data
def dfNormalize(df):
    for feature_name in df.columns:
        df.loc[:,feature_name]= pd.to_numeric(df.loc[:,feature_name], errors='coerce').fillna(0)
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()   
        if (max_value - min_value) > 0:
            df.loc[:,feature_name] = (df.loc[:,feature_name] - min_value) / (max_value - min_value)
        else:
            df.loc[:,feature_name] = (df.loc[:,feature_name]- min_value)    
    return df


In [13]:
dataframe= pd.read_csv(url, names=names, low_memory=False)
dataframe = dataframe.reindex(np.random.permutation(dataframe.index)).copy()
print(dataframe.describe())
print(list(dataframe))

              sport          dur         sbytes        dbytes         sttl  \
count   3500.000000  3500.000000    3500.000000  3.500000e+03  3500.000000   
mean   31926.315429     3.111007    3468.078571  2.876112e+04    35.811714   
std    19176.226410     7.019469    7187.232820  1.356012e+05    31.764679   
min        0.000000     0.000000      46.000000  0.000000e+00     0.000000   
25%    15392.000000     0.010383     568.000000  3.200000e+02    31.000000   
50%    31459.500000     0.576294    1540.000000  1.644000e+03    31.000000   
75%    48528.000000     2.254592    2662.000000  1.016800e+04    31.000000   
max    65534.000000    50.004387  115727.000000  1.641360e+06   254.000000   

              dttl        sloss        dloss         Sload         Dload  ...  \
count  3500.000000  3500.000000  3500.000000  3.500000e+03  3.500000e+03  ...   
mean     34.915714     5.156286    13.966857  1.862092e+06  1.346940e+06  ...   
std      37.409203     6.487297    47.821493  3.574078

In [14]:
keys = dataframe.keys()

data_to_process = dataframe[keys[4:len(keys)-1]].copy()

x_normalised=dfNormalize(data_to_process)
print(x_normalised.describe())


        proto   state          dur       sbytes       dbytes         sttl  \
count  3500.0  3500.0  3500.000000  3500.000000  3500.000000  3500.000000   
mean      0.0     0.0     0.062215     0.029582     0.017523     0.140991   
std       0.0     0.0     0.140377     0.062130     0.082615     0.125058   
min       0.0     0.0     0.000000     0.000000     0.000000     0.000000   
25%       0.0     0.0     0.000208     0.004512     0.000195     0.122047   
50%       0.0     0.0     0.011525     0.012915     0.001002     0.122047   
75%       0.0     0.0     0.045088     0.022614     0.006195     0.122047   
max       0.0     0.0     1.000000     1.000000     1.000000     1.000000   

              dttl        sloss        dloss  service  ...  is_ftp_login  \
count  3500.000000  3500.000000  3500.000000   3500.0  ...   3500.000000   
mean      0.138554     0.093751     0.023957      0.0  ...      0.006000   
std       0.148449     0.117951     0.082027      0.0  ...      0.077238   
mi

In [15]:
# get the train and test data
y = dataframe['Label']


In [16]:
# Split the data into train and test sets
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x_normalised, y, test_size=0.2, random_state=42)


In [17]:
# Create the neural network model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

model = Sequential([
    Dense(64, activation='relu', input_shape=(x_train.shape[1],)),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])


In [18]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [19]:
# Train the model
history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [22]:
# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(x_test, y_test)

# Print the test accuracy
print('Test accuracy:', test_acc)

Test accuracy: 1.0
