In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import time
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, f1_score, recall_score
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from imblearn.under_sampling import RandomUnderSampler

In [None]:
filenames = [
    'NF-UNSW-NB15-v3',
    'NF-BoT-IoT-v3',
    'NF-CICIDS2018-v3',
    'NF-ToN-IoT-v3'
]

list_of_dfs = []

print("Iniciando o carregamento dos arquivos...")
for filename in filenames:
    path = f'db/{filename}.csv'
    print(f"  -> Carregando: {path}")
    try:
        temp_df = pd.read_csv(path)
        list_of_dfs.append(temp_df)
    except FileNotFoundError:
        print(f"  -> AVISO: O arquivo {path} não foi encontrado e será ignorado.")

In [None]:
print("Concatenando os dataframes...")
df = pd.concat(list_of_dfs, ignore_index=True)
print(f"Processo finalizado! O dataframe final contém {len(df)} linhas.")

In [None]:
df = list_of_dfs[2]

In [2]:
filename = 'NF-ToN-IoT-v3'

df = pd.read_csv(f'db/{filename}.csv')

In [3]:
df.drop(['IPV4_SRC_ADDR',
         'IPV4_DST_ADDR',
         'L4_SRC_PORT',
         'L4_DST_PORT',
         'L7_PROTO',
         'TCP_FLAGS',
         'CLIENT_TCP_FLAGS',
         'SERVER_TCP_FLAGS',
         'MIN_TTL', 
         'MAX_TTL',
         'SHORTEST_FLOW_PKT',
         'MIN_IP_PKT_LEN', 
         'TCP_WIN_MAX_IN', 
         'TCP_WIN_MAX_OUT', 
         'DNS_QUERY_ID', 
         'DNS_TTL_ANSWER',
         'FTP_COMMAND_RET_CODE',
         'SRC_TO_DST_SECOND_BYTES',
         'DST_TO_SRC_SECOND_BYTES',
         'FLOW_START_MILLISECONDS',
         'FLOW_END_MILLISECONDS',], inplace=True, axis=1)

In [4]:
dictionary_sets_by_attack_type = {}
attack_types = df['Attack'].unique()

In [5]:
for attack_type in attack_types:
    print(f"Processando a categoria: '{attack_type}'")
    df_current_attack = df[df['Attack'] == attack_type]

    df_train_current_attack, df_aux_current_attack = train_test_split(df_current_attack, train_size=0.5, random_state=42)
    df_test_current_attack, df_val_current_attack = train_test_split(df_aux_current_attack, train_size=0.5, random_state=42)

    dictionary_sets_by_attack_type[attack_type] = {
        'treino': df_train_current_attack,
        'teste': df_test_current_attack,
        'validacao': df_val_current_attack
    }
    print(f"  -> Treino: {len(df_train_current_attack)} | Teste: {len(df_test_current_attack)} | Validação: {len(df_val_current_attack)}")


Processando a categoria: 'Benign'
  -> Treino: 8396107 | Teste: 4198053 | Validação: 4198054
Processando a categoria: 'scanning'
  -> Treino: 679488 | Teste: 339744 | Validação: 339745
Processando a categoria: 'dos'
  -> Treino: 101728 | Teste: 50864 | Validação: 50864
Processando a categoria: 'injection'
  -> Treino: 190888 | Teste: 95444 | Validação: 95445
Processando a categoria: 'ddos'
  -> Treino: 2070628 | Teste: 1035314 | Validação: 1035314
Processando a categoria: 'password'
  -> Treino: 797388 | Teste: 398694 | Validação: 398695
Processando a categoria: 'xss'
  -> Treino: 1417217 | Teste: 708609 | Validação: 708609
Processando a categoria: 'ransomware'
  -> Treino: 1985 | Teste: 993 | Validação: 993
Processando a categoria: 'Backdoor'
  -> Treino: 101692 | Teste: 50846 | Validação: 50846
Processando a categoria: 'mitm'
  -> Treino: 3006 | Teste: 1503 | Validação: 1504


In [6]:
list_train = [dictionary_sets_by_attack_type[attack_type]['treino'] for attack_type in attack_types]
df_train = pd.concat(list_train)

list_test = [dictionary_sets_by_attack_type[attack_type]['teste'] for attack_type in attack_types]
df_test = pd.concat(list_test)

list_val = [dictionary_sets_by_attack_type[attack_type]['validacao'] for attack_type in attack_types]
df_val = pd.concat(list_val)

In [7]:
print(f"--- Base de Treino ---")
print(f"Tamanho: {len(df_train)} linhas")
print(f"Categorias presentes: {df_train['Attack'].unique()}")
print(df_train['Attack'].value_counts())
print("-" * 25)

print(f"\n--- Base de Teste ---")
print(f"Tamanho: {len(df_test)} linhas")
print(f"Categorias presentes: {df_test['Attack'].unique()}")
print(df_test['Attack'].value_counts())
print("-" * 25)

print(f"\n--- Base de Validação ---")
print(f"Tamanho: {len(df_val)} linhas")
print(f"Categorias presentes: {df_val['Attack'].unique()}")
print(df_val['Attack'].value_counts())
print("-" * 25)

--- Base de Treino ---
Tamanho: 13760127 linhas
Categorias presentes: ['Benign' 'scanning' 'dos' 'injection' 'ddos' 'password' 'xss'
 'ransomware' 'Backdoor' 'mitm']
Attack
Benign        8396107
ddos          2070628
xss           1417217
password       797388
scanning       679488
injection      190888
dos            101728
Backdoor       101692
mitm             3006
ransomware       1985
Name: count, dtype: int64
-------------------------

--- Base de Teste ---
Tamanho: 6880064 linhas
Categorias presentes: ['Benign' 'scanning' 'dos' 'injection' 'ddos' 'password' 'xss'
 'ransomware' 'Backdoor' 'mitm']
Attack
Benign        4198053
ddos          1035314
xss            708609
password       398694
scanning       339744
injection       95444
dos             50864
Backdoor        50846
mitm             1503
ransomware        993
Name: count, dtype: int64
-------------------------

--- Base de Validação ---
Tamanho: 6880069 linhas
Categorias presentes: ['Benign' 'scanning' 'dos' 'injection'

In [35]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [36]:
scaler = MinMaxScaler()

In [37]:
df_train_benign = df_train[df_train['Attack'] == 'Benign']
df_train_attacks = df_train[df_train['Attack'] != 'Benign']

rus = df_train_attacks['Attack'].value_counts().min()
if rus < 1000:
    rus = 1000

df_train_attacks_balanced = df_train_attacks.groupby('Attack').sample(n=rus, replace=True, random_state=42)

num_attack_classes = len(df_train_attacks['Attack'].unique())
num_benign_samples = num_attack_classes * rus
df_train_benign_sampled = df_train_benign.sample(n=num_benign_samples, random_state=42)

df_train = pd.concat([df_train_attacks_balanced, df_train_benign_sampled])
df_train = shuffle(df_train, random_state=42).reset_index(drop=True)

X_train = df_train.drop(['Label', 'Attack'], axis=1)
y_train = df_train['Label'].to_numpy()

X_train = scaler.fit_transform(X_train)

X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)

In [38]:
df_train['Label'].value_counts()

Label
0    17865
1    17865
Name: count, dtype: int64

In [39]:
df_train['Attack'].value_counts()

Attack
Benign        17865
dos            1985
Backdoor       1985
ddos           1985
injection      1985
mitm           1985
password       1985
xss            1985
ransomware     1985
scanning       1985
Name: count, dtype: int64

In [40]:
len(X_train)

35730

In [41]:
X_train.shape

torch.Size([35730, 32])

In [42]:
y_train.unique(return_counts=True)

(tensor([0, 1], device='cuda:0'), tensor([17865, 17865], device='cuda:0'))

In [43]:
X_train.min(), X_train.max(), X_train.mean()

(tensor(0., device='cuda:0'),
 tensor(1., device='cuda:0'),
 tensor(0.0186, device='cuda:0'))

In [44]:
X_test = df_test.drop(['Label', 'Attack'], axis=1)
y_test = df_test['Label'].to_numpy()

rus = RandomUnderSampler(random_state = 42)
X_test, y_test = rus.fit_resample(X_test, y_test)

X_test = scaler.transform(X_test)

X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.long).to(device)

In [45]:
df_test['Label'].value_counts()

Label
0    4198053
1    2682011
Name: count, dtype: int64

In [46]:
df_test['Attack'].value_counts()

Attack
Benign        4198053
ddos          1035314
xss            708609
password       398694
scanning       339744
injection       95444
dos             50864
Backdoor        50846
mitm             1503
ransomware        993
Name: count, dtype: int64

In [47]:
len(X_test)

5364022

In [48]:
X_test.shape

torch.Size([5364022, 32])

In [49]:
y_test.unique(return_counts=True)

(tensor([0, 1], device='cuda:0'), tensor([2682011, 2682011], device='cuda:0'))

In [50]:
X_test.min(), X_test.max(), X_test.mean()

(tensor(-1.8853e-07, device='cuda:0'),
 tensor(1247.0120, device='cuda:0'),
 tensor(0.0149, device='cuda:0'))

In [51]:
X_val = df_val.drop(['Label', 'Attack'], axis=1)
y_val = df_val['Label'].to_numpy()

rus = RandomUnderSampler(random_state = 42)
X_val, y_val = rus.fit_resample(X_val, y_val)

X_val = scaler.transform(X_val)

X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)

In [52]:
len(X_val)

5364030

In [53]:
y_val.unique(return_counts=True)

(tensor([0, 1], device='cuda:0'), tensor([2682015, 2682015], device='cuda:0'))

In [27]:
X_val.min(), X_val.max(), X_val.mean()

(tensor(-9.4266e-08), tensor(1085.4226), tensor(0.0148))

In [60]:
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=512, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

In [61]:
NUM_FEATURES = X_train.shape[1]
NUM_CLASSES = 2

class IDSBranchyNet(nn.Module):
    def __init__(self):
        super(IDSBranchyNet, self).__init__()
        self.main_part1 = nn.Sequential(
            nn.Linear(NUM_FEATURES, 82),
            nn.ReLU()
        )
        self.main_part2 = nn.Sequential(
            nn.Linear(82, 512),
            nn.ReLU(),
            nn.Linear(512, NUM_CLASSES)
        )
        self.branch = nn.Sequential(
            nn.Linear(82, 41),
            nn.ReLU(),
            nn.Linear(41, NUM_CLASSES)
        )

    def forward(self, x, part="both", intermediate_output=None):
        if part == "branch":
            intermediate_output = self.main_part1(x)
            branch_output = self.branch(intermediate_output)
            return branch_output, intermediate_output

        if part == "main":
            main_output = self.main_part2(intermediate_output)
            return main_output
        
        if part == "both":
            intermediate_output = self.main_part1(x)
            branch_output = self.branch(intermediate_output)
            main_output = self.main_part2(intermediate_output)
            return [branch_output, main_output]

model = IDSBranchyNet()
print(model)

IDSBranchyNet(
  (main_part1): Sequential(
    (0): Linear(in_features=32, out_features=82, bias=True)
    (1): ReLU()
  )
  (main_part2): Sequential(
    (0): Linear(in_features=82, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=2, bias=True)
  )
  (branch): Sequential(
    (0): Linear(in_features=82, out_features=41, bias=True)
    (1): ReLU()
    (2): Linear(in_features=41, out_features=2, bias=True)
  )
)


In [62]:
model = IDSBranchyNet()

In [63]:
def train_model(model, train_loader, val_loader, epochs, weight_branch, weight_main, lr, device, patience=15):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_loss_history = []
    val_loss_history = []

    best_val_loss = float('inf')
    epochs_no_improve = 0
    best_model_state = None

    for epoch in range(epochs):
        model.train()
        running_train_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs, "both")
            loss_branch = criterion(outputs[0], labels)
            loss_main = criterion(outputs[1], labels)

            total_loss = (weight_branch * loss_branch) + (weight_main * loss_main)

            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

            running_train_loss += total_loss.item()
        
        epoch_train_loss = running_train_loss / len(train_loader)
        train_loss_history.append(epoch_train_loss)

        val_loss = 0
        model.eval()
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs, "both")
                loss_branch = criterion(outputs[0], labels)
                loss_main = criterion(outputs[1], labels)
                
                total_loss = (weight_branch * loss_branch) + (weight_main * loss_main)
                val_loss += total_loss.item()
        
        epoch_val_loss = val_loss/len(val_loader)
        val_loss_history.append(epoch_val_loss)

        print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {epoch_train_loss:.4f}, Validation Loss: {epoch_val_loss:.4f}')
        
        if epoch_val_loss < best_val_loss:
            best_val_loss = epoch_val_loss
            epochs_no_improve = 0
            best_model_state = model.state_dict()
        else:
            epochs_no_improve += 1
        
        if epochs_no_improve >= patience:
            print(f'\n--- EARLY STOPPING ATIVADO ---')
            print(f'Parando o treino na época {epoch+1} pois a loss de validação não melhora há {patience} épocas.')
            print(f'A melhor loss de validação foi: {best_val_loss:.4f}')
            if best_model_state:
                model.load_state_dict(best_model_state)
            break

    plt.figure(figsize=(10, 5))
    plt.plot(range(1, len(train_loss_history) + 1), train_loss_history, label='Loss de Treinamento')
    plt.plot(range(1, len(val_loss_history) + 1), val_loss_history, label='Loss de Validação')
    plt.xlabel('Épocas')
    plt.ylabel('Loss')
    plt.title('Curvas de Loss de Treinamento e Validação')
    plt.legend()
    plt.grid(True)
    plt.show()

    return train_loss_history, val_loss_history

In [64]:
def evaluate_model(model, loader, confidence_threshold, device):
    model.to(device)
    model.eval()

    total_samples = len(loader.dataset)
    all_predictions = []
    all_labels = []
    exited_early_count = 0
    total_inference_time = 0

    with torch.no_grad():
        for samples, labels in loader:
            samples, labels = samples.to(device), labels.to(device)

            start_time = time.perf_counter()
            
            branch_output, intermediate_output = model(samples, "branch")
            branch_prob = F.softmax(branch_output, dim=1)
            trusts, branch_preds = torch.max(branch_prob, 1)

            batch_predictions = torch.zeros_like(labels)
            
            early_exit_mask = trusts > confidence_threshold
            
            if early_exit_mask.any():
                batch_predictions[early_exit_mask] = branch_preds[early_exit_mask]
                exited_early_count += early_exit_mask.sum().item()

            main_branch_mask = ~early_exit_mask
            if main_branch_mask.any():
                intermediate_to_main = intermediate_output[main_branch_mask]
                
                main_output = model(None, "main", intermediate_to_main)
                main_prob = F.softmax(main_output, dim=1)
                _, main_preds = torch.max(main_prob, 1)
                
                batch_predictions[main_branch_mask] = main_preds

            end_time = time.perf_counter()
            total_inference_time += (end_time - start_time)

            all_predictions.append(batch_predictions.cpu())
            all_labels.append(labels.cpu())

    final_predictions = torch.cat(all_predictions)
    y_data = torch.cat(all_labels)

    correct = (final_predictions == y_data).sum().item()
    accuracy = 100 * correct / total_samples
    exit_rate = 100 * exited_early_count / total_samples
    avg_time_ms = (total_inference_time / total_samples) * 1000

    cm = confusion_matrix(y_data.numpy(), final_predictions.numpy())

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Normal', 'Ataque'],
                yticklabels=['Normal', 'Ataque'])
    plt.xlabel('Rótulo Previsto')
    plt.ylabel('Rótulo Verdadeiro')
    plt.title(f'Matriz de Confusão (Limiar de Confiança = {confidence_threshold})')
    plt.show()

    tn, fp, fn, tp = cm.ravel()
    
    f1 = f1_score(y_data.numpy(), final_predictions.numpy())
    
    tpr = recall_score(y_data.numpy(), final_predictions.numpy())

    tnr = tn / (tn + fp)
    
    print(f"True Positives (TP): {tp}")
    print(f"True Negatives (TN): {tn}")
    print(f"False Positives (FP): {fp}")
    print(f"False Negatives (FN): {fn}\n")
    
    print(f"F1 Score: {f1:.4f}")
    print(f"True Positive Rate (TPR) / Recall: {tpr:.4f}")
    print(f"True Negative Rate (TNR) / Specificity: {tnr:.4f}")

    return {
            'accuracy': accuracy,
            'exit_rate': exit_rate,
            'avg_inference_time_ms': avg_time_ms,
            'exited_early_count': exited_early_count,
            'total_samples': total_samples
        }

In [65]:
modelname = 'NF-ToN-IoT-v3'

In [66]:
modelname

'NF-ToN-IoT-v3'

In [None]:
epochs = 150

train_losses, val_losses = train_model(model, train_loader, val_loader, epochs, weight_branch=0.7, weight_main=0.3, lr=0.0001, device=device)

torch.save(model.state_dict(), f'models/24_10/teste1/{modelname}.pth')
print(f"\nModelo treinado e salvo em 'models/24_10/teste1/{modelname}.pth'")

In [None]:
print("--- Evaluation on Test Set using DataLoader ---")

model.load_state_dict(torch.load(f'models/24_10/teste1/{modelname}.pth'))
print(f"Modelo 'models/24_10/teste1/{modelname}.pth' carregado")


thresholds_to_test = [2]

for T in thresholds_to_test:
    results = evaluate_model(model, test_loader, confidence_threshold=T, device=device)
    print("-" * 20)
    print(f"Threshold: {T}")
    print(f"  Accuracy: {results['accuracy']:.2f}%")
    print(f"  Avg. Inference Time: {results['avg_inference_time_ms']:.4f} ms")
    print(f"  Early Exit Rate: {results['exit_rate']:.2f}% ({results['exited_early_count']}/{results['total_samples']})")
    print("-" * 20)