In [34]:
# Imports

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from torch.utils.data import DataLoader, TensorDataset

In [35]:
# Loading database

df = pd.read_csv("dataset/UNSW_NB15_training-set.csv", low_memory=False)
X = df.iloc[:, :-1]
y = df['label']

In [36]:
df.head()

Unnamed: 0,id,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,...,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,attack_cat,label
0,1,0.121478,tcp,-,FIN,6,4,258,172,74.08749,...,1,1,0,0,0,1,1,0,Normal,0
1,2,0.649902,tcp,-,FIN,14,38,734,42014,78.473372,...,1,2,0,0,0,1,6,0,Normal,0
2,3,1.623129,tcp,-,FIN,8,16,364,13186,14.170161,...,1,3,0,0,0,2,6,0,Normal,0
3,4,1.681642,tcp,ftp,FIN,12,12,628,770,13.677108,...,1,3,1,1,0,2,1,0,Normal,0
4,5,0.449454,tcp,-,FIN,10,6,534,268,33.373826,...,1,40,0,0,0,2,39,0,Normal,0


In [37]:
normal = df.loc[df["label"] == 0]
malicious = df.loc[df["label"] == 1]

print(f"There is {len(normal)} normal traffic and {len(malicious)} malicious traffic")

There is 56000 normal traffic and 119341 malicious traffic


In [38]:
from sklearn.preprocessing import LabelEncoder

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42)

# Excluindo a linha com valor 'URN' na coluna 'state'
X_train = X_train[X_train['state'] != 'URN']
X_test = X_test[X_test['state'] != 'URN']

# Também removendo a linha correspondente em y_train e y_test
y_train = y_train[X_train.index]
y_test = y_test[X_test.index]

# Aplicando o LabelEncoder a todas as colunas do tipo 'object'
label_encoder = LabelEncoder()

# Para todas as colunas categóricas em X_train e X_test
for column in X_train.select_dtypes(include=['object']).columns:
    X_train[column] = label_encoder.fit_transform(X_train[column])
    X_test[column] = label_encoder.transform(X_test[column])

# Garantindo que todas as colunas sejam convertidas para float32
X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)

# Convertendo os DataFrames para numpy arrays para alimentar no modelo
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()

# Exibindo o tamanho do conjunto de dados
print(f"Dataset length: {X_train.shape}")

Dataset length: (122738, 44)


In [39]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [40]:
batch_size = 64

train_data = TensorDataset(torch.tensor(X_train, dtype=torch.float32).to(device),
                           torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1).to(device))
test_data = TensorDataset(torch.tensor(X_test, dtype=torch.float32).to(device),
                          torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1).to(device))

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [59]:
class MainNetwork(nn.Module):
    def __init__(self):
        super(MainNetwork, self).__init__()
        self.fc1 = nn.Linear(44, 44)
        self.fc2 = nn.Linear(44, 36)
        self.fc3 = nn.Linear(36, 24)
        self.fc4 = nn.Linear(24, 12)
        self.fc5 = nn.Linear(12, 1)

        self.tahn = nn.Tanh()
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(p=0.1)
        self.batch_norm1 = nn.BatchNorm1d(44)
        self.batch_norm2 = nn.BatchNorm1d(36)
        self.batch_norm3 = nn.BatchNorm1d(24)
        self.batch_norm4 = nn.BatchNorm1d(12)

     def forward(self, x):
        x = self.tahn(self.fc1(x))
        x = self.batch_norm1(x)  # Aplica normalização de batch após a primeira camada
        x = self.dropout(x)  # Aplica dropout após a primeira camada
        x = self.tahn(self.fc2(x))
        x = self.batch_norm2(x)  # Aplica normalização de batch após a segunda camada
        x = self.dropout(x)  # Aplica dropout após a segunda camada
        x = self.tahn(self.fc3(x))
        x = self.batch_norm3(x)  # Aplica normalização de batch após a terceira camada
        x = self.dropout(x)  # Aplica dropout após a terceira camada
        x = self.tahn(self.fc4(x))
        x = self.batch_norm4(x)  # Aplica normalização de batch após a quarta camada
        x = self.dropout(x)  # Aplica dropout após a quarta camada
        x = self.sigmoid(self.fc5(x))
        return x

In [62]:
# Model, loss function and optimizer

model = MainNetwork().to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-3)

In [None]:
epochs = 120

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    running_acc = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        y_pred = model(inputs)
        loss = criterion(y_pred, labels)

        loss.backward()
        optimizer.step()

        predicted = (y_pred > 0.5).float()
        correct = (predicted == labels).float()

        acc = correct.sum() / len(correct)

        running_loss += loss.item()
        running_acc += acc.item()



    avg_loss = running_loss / len(train_loader)
    avg_acc = running_acc / len(train_loader)


    print(f'Epoch [{
        epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Accuracy: {avg_acc * 100:.2f}%')


Epoch [1/120], Loss: 0.4775, Accuracy: 74.80%
Epoch [2/120], Loss: 0.4122, Accuracy: 78.37%
Epoch [3/120], Loss: 0.4160, Accuracy: 76.55%
Epoch [4/120], Loss: 0.4173, Accuracy: 75.21%
Epoch [5/120], Loss: 0.4159, Accuracy: 75.30%
Epoch [6/120], Loss: 0.4161, Accuracy: 75.22%
Epoch [7/120], Loss: 0.4158, Accuracy: 75.20%
Epoch [8/120], Loss: 0.4150, Accuracy: 75.16%
Epoch [9/120], Loss: 0.4142, Accuracy: 75.33%
Epoch [10/120], Loss: 0.4153, Accuracy: 75.09%
Epoch [11/120], Loss: 0.4149, Accuracy: 75.00%
