# Detector de Ataques de Rede usando BERT

Este notebook implementa um detector de ataques de rede usando o modelo BERT completo, replicando a funcionalidade dos monitores em tempo real que usam versões reduzidas (DistilBERT, MiniLM e TinyBERT).

## Configuração Inicial

Primeiro, vamos instalar as dependências necessárias e configurar o ambiente.


In [1]:
# Instalar dependências necessárias
!pip install transformers torch pandas numpy scikit-learn psutil onnxruntime tqdm

# Importar bibliotecas
import torch
from transformers import BertModel, BertTokenizer
import numpy as np
import pandas as pd
import pickle
import time
import json
import os
import warnings
from datetime import datetime
import psutil
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import gc
from tqdm.notebook import tqdm

# Configurações
BATCH_SIZE = 32  # Tamanho do lote para processamento
MAX_SAMPLES = 1000  # Limite de amostras por arquivo para teste
TEST_MODE = True  # Se True, usa MAX_SAMPLES, se False processa arquivo completo

warnings.filterwarnings('ignore', message='X does not have valid feature names')
warnings.filterwarnings('ignore', category=UserWarning, module='sklearn')


Collecting onnxruntime
  Downloading onnxruntime-1.22.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-man

In [2]:
# Montar Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Definir caminhos
BERT_DIR = "/content/drive/MyDrive/BERT"
DATA_DIR = os.path.join(BERT_DIR, "data")
RESULTS_DIR = os.path.join(BERT_DIR, "Results/BERT")

# Criar diretório de resultados se não existir
os.makedirs(RESULTS_DIR, exist_ok=True)

print("Diretórios configurados:")
print(f"BERT_DIR: {BERT_DIR}")
print(f"DATA_DIR: {DATA_DIR}")
print(f"RESULTS_DIR: {RESULTS_DIR}")


Mounted at /content/drive
Diretórios configurados:
BERT_DIR: /content/drive/MyDrive/BERT
DATA_DIR: /content/drive/MyDrive/BERT/data
RESULTS_DIR: /content/drive/MyDrive/BERT/Results/BERT


In [3]:
class NetworkAttackDetector:
    def __init__(self, confidence_threshold=0.8):
        print("Carregando modelo BERT...")

        # Carregar modelo BERT e tokenizer
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.model = BertModel.from_pretrained('bert-base-uncased')

        # Mover para GPU se disponível
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = self.model.to(self.device)

        # Configurar threshold de confiança
        self.confidence_threshold = confidence_threshold

        # Mapeamento de classes numéricas para nomes (igual ao TinyBERT)
        self.class_names = {
            1: "Backdoor_Malware",
            2: "BenignTraffic",
            3: "BrowserHijacking",
            4: "CommandInjection",
            5: "DDoS-ACK_Fragmentation",
            6: "DDoS-HTTP_Flood",
            7: "DDoS-ICMP_Flood",
            8: "DDoS-ICMP_Fragmentation",
            9: "DDoS-PSHACK_Flood",
            10: "DDoS-RSTFINFlood",
            11: "DDoS-SYN_Flood",
            12: "DDoS-SlowLoris",
            13: "DDoS-SynonymousIP_Flood",
            14: "DDoS-TCP_Flood",
            15: "DDoS-UDP_Flood",
            16: "DDoS-UDP_Fragmentation",
            17: "DNS_Spoofing",
            18: "DictionaryBruteForce",
            19: "DoS-HTTP_Flood",
            20: "DoS-SYN_Flood",
            21: "DoS-TCP_Flood",
            22: "DoS-UDP_Flood",
            23: "MITM-ArpSpoofing",
            24: "Mirai-greeth_flood",
            25: "Mirai-greip_flood",
            26: "Mirai-udpplain",
            27: "Recon-HostDiscovery",
            28: "Recon-OSScan",
            29: "Recon-PingSweep",
            30: "Recon-PortScan",
            31: "SqlInjection",
            32: "Uploading_Attack",
            33: "VulnerabilityScan",
            34: "XSS"
        }

        self.classes = list(self.class_names.keys())

        # Inicializar métricas
        self.total_predictions = 0
        self.attack_detections = 0
        self.attack_types = {}
        self.benign_count = 0
        self.inference_times = []
        self.cpu_usage = []
        self.memory_usage = []
        self.high_confidence_predictions = 0
        self.low_confidence_predictions = 0

        print(f"BERT carregado com sucesso!")
        print(f"Dispositivo: {self.device}")
        print(f"Classes detectáveis: {list(self.class_names.values())}")
        print(f"Threshold de confiança: {self.confidence_threshold}")

    def get_class_name(self, class_idx):
        if isinstance(class_idx, (int, np.integer)):
            return self.class_names.get(class_idx, f"Unknown-{class_idx}")
        return class_idx

    def preprocess_features(self, features_dict):
        # Converter features em texto para BERT
        feature_text = " ".join([f"{k}: {v}" for k, v in features_dict.items()])

        # Tokenizar
        inputs = self.tokenizer(
            feature_text,
            padding=True,
            truncation=True,
            max_length=512,
            return_tensors="pt"
        )

        # Mover para o mesmo dispositivo do modelo
        inputs = {k: v.to(self.device) for k, v in inputs.items()}

        return inputs

    def predict(self, features_dict, verbose=True):
        # Capturar métricas de CPU e memória
        cpu_percent_before = psutil.cpu_percent()
        memory_info_before = psutil.virtual_memory().percent

        # Preprocessar features
        inputs = self.preprocess_features(features_dict)

        start_time = time.time()

        # Inferência
        with torch.no_grad():
            outputs = self.model(**inputs)
            pooled_output = outputs.pooler_output

            # Simular classificação com uma camada linear
            logits = torch.nn.Linear(768, len(self.classes)).to(self.device)(pooled_output)
            probabilities = torch.nn.Softmax(dim=1)(logits)

        inference_time = (time.time() - start_time) * 1000

        # Capturar métricas após inferência
        cpu_percent_after = psutil.cpu_percent()
        memory_info_after = psutil.virtual_memory().percent

        # Armazenar métricas
        self.cpu_usage.append(max(cpu_percent_before, cpu_percent_after))
        self.memory_usage.append(max(memory_info_before, memory_info_after))

        # Processar resultados
        predicted_class_idx = torch.argmax(probabilities[0]).item()
        predicted_class = self.classes[predicted_class_idx]
        confidence = probabilities[0][predicted_class_idx].item()

        self.total_predictions += 1
        self.inference_times.append(inference_time)

        # Classificar como ataque ou normal
        is_benign = (predicted_class == 2)  # BenignTraffic
        is_attack = not is_benign

        # Atualizar estatísticas
        if is_attack:
            self.attack_detections += 1
            self.attack_types[self.get_class_name(predicted_class)] = self.attack_types.get(self.get_class_name(predicted_class), 0) + 1
        else:
            self.benign_count += 1

        if confidence >= self.confidence_threshold:
            self.high_confidence_predictions += 1
        else:
            self.low_confidence_predictions += 1

        # Limpar cache CUDA se necessário
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

        return {
            'timestamp': datetime.now().isoformat(),
            'model': 'BERT',
            'predicted_class': self.get_class_name(predicted_class),
            'confidence': float(confidence),
            'is_attack': is_attack,
            'is_benign': is_benign,
            'inference_time_ms': inference_time,
            'all_probabilities': probabilities[0].cpu().numpy().tolist()
        }

    def get_statistics(self):
        if not self.inference_times:
            return {}

        return {
            'model': 'BERT',
            'total_predictions': self.total_predictions,
            'attack_detections': self.attack_detections,
            'benign_detections': self.benign_count,
            'attack_rate': self.attack_detections / self.total_predictions if self.total_predictions > 0 else 0,
            'high_confidence_predictions': self.high_confidence_predictions,
            'low_confidence_predictions': self.low_confidence_predictions,
            'high_confidence_rate': self.high_confidence_predictions / self.total_predictions if self.total_predictions > 0 else 0,
            'attack_types': self.attack_types,
            'avg_inference_time_ms': np.mean(self.inference_times),
            'max_inference_time_ms': np.max(self.inference_times),
            'min_inference_time_ms': np.min(self.inference_times),
            'throughput_per_second': 1000 / np.mean(self.inference_times) if self.inference_times else 0,
            'avg_cpu_usage': np.mean(self.cpu_usage) if self.cpu_usage else 0,
            'max_cpu_usage': np.max(self.cpu_usage) if self.cpu_usage else 0,
            'avg_memory_usage': np.mean(self.memory_usage) if self.memory_usage else 0,
            'max_memory_usage': np.max(self.memory_usage) if self.memory_usage else 0,
            'confidence_threshold': self.confidence_threshold
        }


In [4]:
class RealTimeMonitor:
    def __init__(self, detector, log_file='attack_log.json', result_file=None):
        self.detector = detector
        self.log_file = log_file
        self.result_file = result_file
        self.results = []
        self.true_labels = []
        self.predicted_labels = []

    def log_detection(self, result):
        with open(self.log_file, 'a') as f:
            f.write(json.dumps(result) + '\n')

    def save_result(self, message):
        if self.result_file:
            with open(self.result_file, 'a', encoding='utf-8') as f:
                f.write(message + '\n')
        else:
            print(message)

    def save_all_results(self):
        if self.result_file and self.results:
            with open(self.result_file, 'w', encoding='utf-8') as f:
                f.write("=== RESULTADOS DA ANÁLISE BERT ===\n")
                f.write(f"Data/Hora: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
                f.write(f"Modelo: BERT (Modelo Completo)\n")
                f.write(f"Total de amostras processadas: {len(self.results)}\n")
                f.write(f"Threshold de confiança: {self.detector.confidence_threshold}\n\n")

                # Estatísticas básicas
                attacks = [r for r in self.results if r['is_attack']]
                benign = [r for r in self.results if not r['is_attack']]

                f.write(f"Ataques detectados: {len(attacks)}\n")
                f.write(f"Tráfego normal: {len(benign)}\n")
                f.write(f"Taxa de ataques: {len(attacks)/len(self.results)*100:.2f}%\n\n")

                # Estatísticas de confiança
                confidences = [r['confidence'] for r in self.results]
                low_confidence = [r for r in self.results if r['confidence'] < self.detector.confidence_threshold]
                high_confidence = [r for r in self.results if r['confidence'] >= self.detector.confidence_threshold]

                f.write("=== ESTATÍSTICAS DE CONFIANÇA ===\n")
                f.write(f"Predições com alta confiança: {len(high_confidence)} ({len(high_confidence)/len(self.results)*100:.2f}%)\n")
                f.write(f"Predições com baixa confiança: {len(low_confidence)} ({len(low_confidence)/len(self.results)*100:.2f}%)\n")
                f.write(f"Confiança média: {np.mean(confidences):.4f}\n")
                f.write(f"Confiança mediana: {np.median(confidences):.4f}\n")
                f.write(f"Confiança mínima: {np.min(confidences):.4f}\n")
                f.write(f"Confiança máxima: {np.max(confidences):.4f}\n")
                f.write(f"Desvio padrão da confiança: {np.std(confidences):.4f}\n\n")

                # Lista de todos os ataques e suas incidências
                attack_types = {}
                for result in self.results:
                    if result['is_attack']:
                        attack_type = result['predicted_class']
                        attack_types[attack_type] = attack_types.get(attack_type, 0) + 1

                f.write("=== INCIDÊNCIA DE ATAQUES ===\n")
                if attack_types:
                    for attack_type, count in sorted(attack_types.items(), key=lambda x: x[1], reverse=True):
                        f.write(f"{attack_type}: {count} ocorrências ({count/len(self.results)*100:.2f}%)\n")
                else:
                    f.write("Nenhum ataque detectado\n")
                f.write(f"Tráfego Normal: {len(benign)} ocorrências ({len(benign)/len(self.results)*100:.2f}%)\n\n")

                # Métricas de performance
                inference_times = [r['inference_time_ms'] for r in self.results]

                f.write("=== MÉTRICAS DE DESEMPENHO ===\n")
                f.write(f"Tempo médio de inferência: {np.mean(inference_times):.2f} ms\n")
                f.write(f"Tempo máximo de inferência: {np.max(inference_times):.2f} ms\n")
                f.write(f"Tempo mínimo de inferência: {np.min(inference_times):.2f} ms\n")
                f.write(f"Desvio padrão da inferência: {np.std(inference_times):.2f} ms\n")
                f.write(f"Percentil 95 (P95) da inferência: {np.percentile(inference_times, 95):.2f} ms\n")
                f.write(f"Percentil 99 (P99) da inferência: {np.percentile(inference_times, 99):.2f} ms\n")
                f.write(f"Throughput: {1000 / np.mean(inference_times):.2f} predições/segundo\n")

                # Adicionar métricas de CPU e memória
                stats = self.detector.get_statistics()
                f.write(f"Uso médio de CPU: {stats.get('avg_cpu_usage', 0):.2f}%\n")
                f.write(f"Uso máximo de CPU: {stats.get('max_cpu_usage', 0):.2f}%\n")
                f.write(f"Uso médio de memória: {stats.get('avg_memory_usage', 0):.2f}%\n")
                f.write(f"Uso máximo de memória: {stats.get('max_memory_usage', 0):.2f}%\n\n")

                # Adicionar métricas de acurácia se tivermos rótulos reais
                if self.true_labels and len(self.true_labels) == len(self.results):
                    predicted_labels = []
                    for result in self.results:
                        predicted_labels.append(1 if result['is_attack'] else 0)

                    f.write("=== MÉTRICAS DE ACURÁCIA ===\n")
                    accuracy = accuracy_score(self.true_labels, predicted_labels)
                    precision = precision_score(self.true_labels, predicted_labels, zero_division=0)
                    recall = recall_score(self.true_labels, predicted_labels, zero_division=0)
                    f1 = f1_score(self.true_labels, predicted_labels, zero_division=0)

                    f.write(f"Acurácia: {accuracy:.4f}\n")
                    f.write(f"Precisão: {precision:.4f}\n")
                    f.write(f"Recall: {recall:.4f}\n")
                    f.write(f"F1-Score: {f1:.4f}\n")

                    # Matriz de confusão
                    cm = confusion_matrix(self.true_labels, predicted_labels)
                    f.write("\nMatriz de Confusão:\n")
                    f.write("    | Normal | Ataque\n")
                    f.write("----|--------|-------\n")
                    f.write(f"Normal  | {cm[0][0]:6d} | {cm[0][1]:6d}\n")
                    f.write(f"Ataque  | {cm[1][0]:6d} | {cm[1][1]:6d}\n\n")

                    # Calcular acurácia por amostra
                    correct_predictions = sum(1 for true, pred in zip(self.true_labels, predicted_labels) if true == pred)
                    incorrect_predictions = sum(1 for true, pred in zip(self.true_labels, predicted_labels) if true != pred)

                    f.write(f"Predições corretas: {correct_predictions} ({correct_predictions/len(self.results)*100:.2f}%)\n")
                    f.write(f"Predições incorretas: {incorrect_predictions} ({incorrect_predictions/len(self.results)*100:.2f}%)\n\n")

                # Detalhes de todas as detecções
                f.write("=== DETALHES DE TODAS AS DETECÇÕES ===\n")

                # Ordenar todas as detecções por confiança
                all_results_sorted = sorted(self.results, key=lambda x: x['confidence'], reverse=True)

                if all_results_sorted:
                    for i, result in enumerate(all_results_sorted, 1):
                        detection_type = "ATAQUE" if result['is_attack'] else "TRÁFEGO NORMAL"
                        class_name = result['predicted_class']

                        f.write(f"\n[{i}] {detection_type}: {class_name}\n")
                        f.write(f"    Timestamp: {result['timestamp']}\n")
                        f.write(f"    Confiança: {result['confidence']:.4f}\n")
                        f.write(f"    Tempo de inferência: {result['inference_time_ms']:.2f} ms\n")

                        # Se temos rótulos verdadeiros, mostrar se a predição foi correta
                        if self.true_labels and i-1 < len(self.true_labels):
                            true_value = self.true_labels[i-1]
                            pred_value = 1 if result['is_attack'] else 0
                            is_correct = (true_value == pred_value)
                            f.write(f"    Predição correta: {'✓ SIM' if is_correct else '✗ NÃO'}\n")

                        # Mostrar as top 3 classes com maior probabilidade
                        top_classes = sorted(
                            zip(range(len(result['all_probabilities'])), result['all_probabilities']),
                            key=lambda x: x[1],
                            reverse=True
                        )[:3]

                        f.write("    Top 3 classes mais prováveis:\n")
                        for cls_idx, prob in top_classes:
                            cls_name = self.detector.get_class_name(self.detector.classes[cls_idx])
                            f.write(f"      - {cls_name}: {prob:.4f}\n")

                        f.write("    " + "-"*40 + "\n")

                        if i >= 1000:
                            f.write(f"\n... mais {len(all_results_sorted) - 1000} detecções omitidas ...\n")
                            break
                else:
                    f.write("Nenhuma detecção registrada durante a análise\n\n")

                # Resumos finais
                f.write("\n=== RESUMO DE ATAQUES ===\n")
                if attacks:
                    f.write(f"Total de ataques: {len(attacks)}\n")
                    attack_confidence = [r['confidence'] for r in attacks]
                    f.write(f"Confiança média de ataques: {np.mean(attack_confidence):.4f}\n")
                    f.write(f"Confiança mínima de ataques: {np.min(attack_confidence):.4f}\n")
                    f.write(f"Confiança máxima de ataques: {np.max(attack_confidence):.4f}\n")
                else:
                    f.write("Nenhum ataque detectado\n")

                f.write("\n=== RESUMO DE TRÁFEGO NORMAL ===\n")
                if benign:
                    f.write(f"Total de tráfego normal: {len(benign)}\n")
                    benign_confidence = [r['confidence'] for r in benign]
                    f.write(f"Confiança média de tráfego normal: {np.mean(benign_confidence):.4f}\n")
                    f.write(f"Confiança mínima de tráfego normal: {np.min(benign_confidence):.4f}\n")
                    f.write(f"Confiança máxima de tráfego normal: {np.max(benign_confidence):.4f}\n")
                else:
                    f.write("Nenhum tráfego normal detectado\n")

                # Top 10 detecções
                f.write("\n=== TOP 10 DETECÇÕES POR CONFIANÇA ===\n")
                top_confidence = sorted(self.results, key=lambda x: x['confidence'], reverse=True)[:10]

                for i, result in enumerate(top_confidence, 1):
                    detection_type = "ATAQUE" if result['is_attack'] else "NORMAL"
                    f.write(f"{i}. [{detection_type}] {result['predicted_class']} (Confiança: {result['confidence']:.4f})\n")

                # Inferências mais rápidas/lentas
                f.write("\n=== 10 INFERÊNCIAS MAIS RÁPIDAS ===\n")
                fastest = sorted(self.results, key=lambda x: x['inference_time_ms'])[:10]

                for i, result in enumerate(fastest, 1):
                    detection_type = "ATAQUE" if result['is_attack'] else "NORMAL"
                    f.write(f"{i}. [{detection_type}] {result['inference_time_ms']:.2f} ms - {result['predicted_class']}\n")

                f.write("\n=== 10 INFERÊNCIAS MAIS LENTAS ===\n")
                slowest = sorted(self.results, key=lambda x: x['inference_time_ms'], reverse=True)[:10]

                for i, result in enumerate(slowest, 1):
                    detection_type = "ATAQUE" if result['is_attack'] else "NORMAL"
                    f.write(f"{i}. [{detection_type}] {result['inference_time_ms']:.2f} ms - {result['predicted_class']}\n")


In [5]:
def process_batch(batch_df, detector, has_labels=True):
    """Processa um lote de dados em paralelo"""
    batch_size = len(batch_df)
    all_features = []
    true_labels = []
    
    # Preparar features e labels
    for _, row in batch_df.iterrows():
        if has_labels:
            label_value = row['label']
            is_attack = 1
            if isinstance(label_value, str) and label_value.lower() in ['benigntraffic', 'benign', 'normal']:
                is_attack = 0
            features_dict = row.drop('label').to_dict()
            true_labels.append(is_attack)
        else:
            features_dict = row.to_dict()
        
        # Converter features em texto
        feature_text = " ".join([f"{k}: {v}" for k, v in features_dict.items()])
        all_features.append(feature_text)
    
    # Tokenizar em lote
    inputs = detector.tokenizer(
        all_features,
        padding=True,
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    
    # Mover para GPU
    inputs = {k: v.to(detector.device) for k, v in inputs.items()}
    
    # Inferência em lote
    with torch.no_grad():
        outputs = detector.model(**inputs)
        pooled_output = outputs.pooler_output
        logits = torch.nn.Linear(768, len(detector.classes)).to(detector.device)(pooled_output)
        probabilities = torch.nn.Softmax(dim=1)(logits)
    
    # Processar resultados
    results = []
    for i in range(batch_size):
        pred_class_idx = torch.argmax(probabilities[i]).item()
        pred_class = detector.classes[pred_class_idx]
        confidence = probabilities[i][pred_class_idx].item()
        
        is_benign = (pred_class == 2)
        is_attack = not is_benign
        
        results.append({
            'timestamp': datetime.now().isoformat(),
            'model': 'BERT',
            'predicted_class': detector.get_class_name(pred_class),
            'confidence': float(confidence),
            'is_attack': is_attack,
            'is_benign': is_benign,
            'inference_time_ms': 0,  # Será calculado depois
            'all_probabilities': probabilities[i].cpu().numpy().tolist()
        })
    
    return results, true_labels

def simulate_network_data(csv_file, detector, monitor):
    message = f"Carregando dados de simulação: {csv_file}"
    monitor.save_result(message)
    
    # Carregar dados
    df = pd.read_csv(csv_file)
    if TEST_MODE:
        df = df.head(MAX_SAMPLES)
        message = f"MODO TESTE: Usando apenas {MAX_SAMPLES} amostras"
        monitor.save_result(message)
    
    total_samples = len(df)
    message = f"Iniciando simulação BERT com {total_samples} amostras..."
    monitor.save_result(message)
    
    has_labels = 'label' in df.columns
    
    # Processar em lotes
    start_time = time.time()
    for i in tqdm(range(0, len(df), BATCH_SIZE), desc="Processando lotes"):
        batch_df = df.iloc[i:i+BATCH_SIZE]
        
        batch_start = time.time()
        results, batch_labels = process_batch(batch_df, detector, has_labels)
        batch_time = (time.time() - batch_start) * 1000
        
        # Atualizar tempo de inferência por amostra
        for result in results:
            result['inference_time_ms'] = batch_time / len(results)
        
        # Atualizar monitor
        monitor.results.extend(results)
        if has_labels:
            monitor.true_labels.extend(batch_labels)
        
        # Mostrar alguns resultados
        for result in results:
            if result['is_attack']:
                message = f"🚨 ATAQUE: {result['predicted_class']} (Confiança: {result['confidence']:.3f})"
                monitor.save_result(message)
                monitor.log_detection(result)
            else:
                message = f"✅ TRÁFEGO NORMAL (Confiança: {result['confidence']:.3f})"
                monitor.save_result(message)
        
        # Atualizar estatísticas a cada 5 lotes
        if (i // BATCH_SIZE) % 5 == 0:
            stats = detector.get_statistics()
            elapsed = time.time() - start_time
            samples_processed = len(monitor.results)
            
            progress_msg = f"\nStatus do processamento:"
            progress_msg += f"\n- Amostras processadas: {samples_processed}/{total_samples}"
            progress_msg += f"\n- Tempo decorrido: {elapsed:.1f}s"
            progress_msg += f"\n- Taxa de ataques: {stats.get('attack_rate', 0):.3f}"
            progress_msg += f"\n- Tempo médio/amostra: {stats.get('avg_inference_time_ms', 0):.2f} ms"
            monitor.save_result(progress_msg)
        
        # Limpar cache CUDA periodicamente
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            gc.collect()


In [None]:
# Inicializar detector e monitor
detector = NetworkAttackDetector()
monitor = RealTimeMonitor(detector)

# Processar cada arquivo CSV na pasta data
csv_files = [f for f in os.listdir(DATA_DIR) if f.endswith('.csv')]
print(f"Encontrados {len(csv_files)} arquivos CSV para processar")

for csv_file in csv_files:
    csv_path = os.path.join(DATA_DIR, csv_file)
    csv_basename = os.path.splitext(os.path.basename(csv_file))[0]
    result_file = os.path.join(RESULTS_DIR, f"result-bert-part-{csv_basename}.txt")

    print(f"\nProcessando: {csv_file}")
    print(f"Resultados serão salvos em: {result_file}")

    # Configurar arquivo de resultado para este CSV
    monitor.result_file = result_file
    monitor.results = []  # Limpar resultados anteriores
    monitor.true_labels = []  # Limpar rótulos anteriores

    try:
        simulate_network_data(csv_path, detector, monitor)
        monitor.save_all_results()
        print(f"✅ Análise concluída para {csv_file}")
    except Exception as e:
        print(f"❌ Erro ao processar {csv_file}: {e}")

    # Limpar memória
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()

print("\n🎉 Processamento concluído!")
print(f"Os resultados foram salvos em: {RESULTS_DIR}")


Carregando modelo BERT...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

BERT carregado com sucesso!
Dispositivo: cuda
Classes detectáveis: ['Backdoor_Malware', 'BenignTraffic', 'BrowserHijacking', 'CommandInjection', 'DDoS-ACK_Fragmentation', 'DDoS-HTTP_Flood', 'DDoS-ICMP_Flood', 'DDoS-ICMP_Fragmentation', 'DDoS-PSHACK_Flood', 'DDoS-RSTFINFlood', 'DDoS-SYN_Flood', 'DDoS-SlowLoris', 'DDoS-SynonymousIP_Flood', 'DDoS-TCP_Flood', 'DDoS-UDP_Flood', 'DDoS-UDP_Fragmentation', 'DNS_Spoofing', 'DictionaryBruteForce', 'DoS-HTTP_Flood', 'DoS-SYN_Flood', 'DoS-TCP_Flood', 'DoS-UDP_Flood', 'MITM-ArpSpoofing', 'Mirai-greeth_flood', 'Mirai-greip_flood', 'Mirai-udpplain', 'Recon-HostDiscovery', 'Recon-OSScan', 'Recon-PingSweep', 'Recon-PortScan', 'SqlInjection', 'Uploading_Attack', 'VulnerabilityScan', 'XSS']
Threshold de confiança: 0.8
Encontrados 35 arquivos CSV para processar

Processando: part-00123-363d1ba3-8ab5-4f96-bc25-4d5862db7cb9-c000.csv
Resultados serão salvos em: /content/drive/MyDrive/BERT/Results/BERT/result-bert-part-part-00123-363d1ba3-8ab5-4f96-bc25-4d586