In [69]:
# Imports

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from torch.utils.data import DataLoader, TensorDataset

In [70]:
# Loading database

df = pd.read_csv("dataset/UNSW_NB15_training-set.csv", low_memory=False)
X = df.iloc[:, :-1]
y = df['label']

In [71]:
df.head()

Unnamed: 0,id,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,...,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,attack_cat,label
0,1,0.121478,tcp,-,FIN,6,4,258,172,74.08749,...,1,1,0,0,0,1,1,0,Normal,0
1,2,0.649902,tcp,-,FIN,14,38,734,42014,78.473372,...,1,2,0,0,0,1,6,0,Normal,0
2,3,1.623129,tcp,-,FIN,8,16,364,13186,14.170161,...,1,3,0,0,0,2,6,0,Normal,0
3,4,1.681642,tcp,ftp,FIN,12,12,628,770,13.677108,...,1,3,1,1,0,2,1,0,Normal,0
4,5,0.449454,tcp,-,FIN,10,6,534,268,33.373826,...,1,40,0,0,0,2,39,0,Normal,0


In [37]:
normal = df.loc[df["label"] == 0]
malicious = df.loc[df["label"] == 1]

print(f"There is {len(normal)} normal traffic and {len(malicious)} malicious traffic")

There is 56000 normal traffic and 119341 malicious traffic


In [95]:
# Balancing
from sklearn.preprocessing import LabelEncoder, StandardScaler

label_encoder = LabelEncoder()

for column in X.select_dtypes(include=['object']).columns:
    X[column] = label_encoder.fit_transform(X[column])

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

X_rescaled = scale(X_resampled)


In [96]:


X_train, X_test, y_train, y_test = train_test_split(
    X_rescaled, y_resampled, test_size=0.3, random_state=42)


X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)

# Exibindo o tamanho do conjunto de dados
print(f"Train length: {X_train.shape}")
print(f"Test length: {X_test.shape}")

Train length: (167077, 44)
Test length: (71605, 44)


In [97]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [98]:
batch_size = 64

train_data = TensorDataset(torch.tensor(X_train, dtype=torch.float32).to(device),
                           torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1).to(device))
test_data = TensorDataset(torch.tensor(X_test, dtype=torch.float32).to(device),
                          torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1).to(device))

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [99]:
class MainNetwork(nn.Module):
    def __init__(self):
        super(MainNetwork, self).__init__()
        self.fc1 = nn.Linear(44, 44)
        self.fc2 = nn.Linear(44, 36)
        self.fc3 = nn.Linear(36, 24)
        self.fc4 = nn.Linear(24, 12)
        self.fc5 = nn.Linear(12, 1)

        self.tahn = nn.Tanh()
        self.sigmoid = nn.Sigmoid()
        self.dropout = nn.Dropout(p=0.1)
        

    def forward(self, x):

        x = self.tahn(self.fc1(x))
        x = self.dropout(x)
        x = self.tahn(self.fc2(x))
        x = self.dropout(x)
        x = self.tahn(self.fc3(x))
        x = self.dropout(x)
        x = self.tahn(self.fc4(x))
        x = self.dropout(x)
        x = self.sigmoid(self.fc5(x))
        return x

In [100]:
# Model, loss function and optimizer

model = MainNetwork().to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-3)

In [101]:
epochs = 30

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    running_acc = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        y_pred = model(inputs)
        loss = criterion(y_pred, labels)

        loss.backward()
        optimizer.step()

        predicted = (y_pred > 0.5).float()
        correct = (predicted == labels).float()

        acc = correct.sum() / len(correct)

        running_loss += loss.item()
        running_acc += acc.item()



    avg_loss = running_loss / len(train_loader)
    avg_acc = running_acc / len(train_loader)


    print(f'Epoch [{
        epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Accuracy: {avg_acc * 100:.2f}%')


Epoch [1/30], Loss: 0.2048, Accuracy: 92.70%
Epoch [2/30], Loss: 0.1068, Accuracy: 96.14%
Epoch [3/30], Loss: 0.0960, Accuracy: 96.32%
Epoch [4/30], Loss: 0.0892, Accuracy: 96.45%
Epoch [5/30], Loss: 0.0800, Accuracy: 96.83%
Epoch [6/30], Loss: 0.0678, Accuracy: 97.34%
Epoch [7/30], Loss: 0.0516, Accuracy: 98.05%
Epoch [8/30], Loss: 0.0371, Accuracy: 98.63%
Epoch [9/30], Loss: 0.0274, Accuracy: 98.98%
Epoch [10/30], Loss: 0.0199, Accuracy: 99.30%
Epoch [11/30], Loss: 0.0148, Accuracy: 99.48%
Epoch [12/30], Loss: 0.0112, Accuracy: 99.64%
Epoch [13/30], Loss: 0.0088, Accuracy: 99.73%
Epoch [14/30], Loss: 0.0062, Accuracy: 99.82%
Epoch [15/30], Loss: 0.0051, Accuracy: 99.87%
Epoch [16/30], Loss: 0.0042, Accuracy: 99.90%
Epoch [17/30], Loss: 0.0040, Accuracy: 99.91%
Epoch [18/30], Loss: 0.0036, Accuracy: 99.92%
Epoch [19/30], Loss: 0.0034, Accuracy: 99.93%
Epoch [20/30], Loss: 0.0032, Accuracy: 99.94%
Epoch [21/30], Loss: 0.0032, Accuracy: 99.94%
Epoch [22/30], Loss: 0.0031, Accuracy: 99.9

In [102]:
# Testing
 
model.eval()

with torch.no_grad():
    test_loss = 0.0
    test_acc = 0.0
    y_pred_test = []
    y_true_test = []

    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        y_pred = model(inputs)

        loss = criterion(y_pred, labels)
        test_loss += loss.item()
      
        predicted = (y_pred > 0.5).float()

        correct = (predicted == labels).float()
        acc = correct.sum() / len(correct)
        test_acc += acc.item()

        y_pred_test.extend(predicted.cpu().numpy())
        y_true_test.extend(labels.cpu().numpy())

    avg_test_loss = test_loss / len(test_loader)
    avg_test_acc = test_acc / len(test_loader)

    print(f"Test Loss: {avg_test_loss:.4f}, Test Accuracy: {
          avg_test_acc * 100:.2f}%")

Test Loss: 0.0008, Test Accuracy: 99.99%


In [103]:
y_pred_test = np.array(y_pred_test).flatten()
y_true_test = np.array(y_true_test).flatten()

In [104]:

false_neg = np.sum((y_pred_test == 0) & (y_true_test == 1))
false_pos = np.sum((y_pred_test == 1) & (y_true_test == 0))
incorrect = np.sum(y_pred_test != y_true_test)
total = len(y_true_test)

accuracy = accuracy_score(y_true_test, y_pred_test)
inaccuracy = incorrect / total

print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Incorrect: {inaccuracy * 100:.2f}%')
print(f'False negatives: {false_neg / incorrect:.2f}')
print(f'False positives: {false_pos / incorrect:.2f}')
print(f'False negatives/total: {false_neg / total:.2f}')

Accuracy: 99.99%
Incorrect: 0.01%
False negatives: 0.50
False positives: 0.50
False negatives/total: 0.00


In [106]:
torch.save(model.state_dict(), "model.pth")