# CNN Classifier Training on CICIDS2018

This notebook trains a 1D Convolutional Neural Network on the CICIDS2018 dataset.

**Model:** CNNClassifier (PyTorch)  
**Dataset:** CICIDS2018  
**Task:** Binary Classification (Benign vs Attack)

## 1. Setup and Imports

In [1]:
import sys
sys.path.insert(0, '../..')

import numpy as np
import pandas as pd
import glob
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import time
import gc

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

Using device: cuda
GPU: NVIDIA GeForce RTX 3050 Laptop GPU


## 2. Load and Preprocess Data

In [2]:
# Load all CSV files from the CICIDS2018 raw directory
DATA_PATH = '../../data/raw/cicids2018/'
all_files = glob.glob(os.path.join(DATA_PATH, "*.csv"))
all_files = sorted(all_files, key=lambda x: os.path.getsize(x))

print(f"Found {len(all_files)} files.")
li = []

for filename in all_files:
    file_size_mb = os.path.getsize(filename) / (1024 * 1024)
    if file_size_mb > 1000:
        print(f"Skipping {os.path.basename(filename)} ({file_size_mb:.0f}MB - too large)")
        continue
    print(f"Loading {os.path.basename(filename)}...")
    try:
        df_temp = pd.read_csv(filename, index_col=None, header=0, low_memory=True)
        li.append(df_temp)
    except Exception as e:
        print(f"Error loading {filename}: {e}")

# Concatenate
df = pd.concat(li, axis=0, ignore_index=True)
print(f"Loaded {len(df):,} samples from {len(li)} files")

# Garbage collection to free memory
del li
gc.collect()

Found 10 files.
Loading 03-01-2018.csv...
Loading 02-28-2018.csv...
Loading 02-21-2018.csv...
Loading 02-16-2018.csv...
Loading 02-14-2018.csv...
Loading 02-22-2018.csv...
Loading 03-02-2018.csv...
Loading 02-15-2018.csv...
Loading 02-23-2018.csv...
Skipping 02-20-2018.csv (3867MB - too large)
Loaded 8,247,888 samples from 9 files
  Benign: 6,077,145
  Attack: 2,170,743


In [3]:
# Basic Cleaning
# 1. Strip whitespace from column names
df.columns = df.columns.str.strip()

# 2. Replace Inf with NaN and drop NaNs
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)
print(f"Samples after dropping NaN/Inf: {len(df):,}")
print("Remaining columns:", len(df.columns))

Samples after dropping NaN/Inf: 8,247,888
Remaining columns: 80


In [4]:
# Create Binary Label: Benign (0) vs Attack (1)
def create_binary_label(label):
    if isinstance(label, str) and 'BENIGN' in label.upper():
        return 0
    return 1

df['binary_label'] = df['Label'].apply(create_binary_label)

print("Binary class distribution:")
print(df['binary_label'].value_counts())

# Drop non-numeric columns
drop_cols = ['Flow ID', 'Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Protocol', 'Timestamp', 'Label']
for col in drop_cols:
    if col in df.columns:
        df.drop(columns=[col], inplace=True, errors='ignore')

# Convert all remaining columns to numeric
for col in df.columns:
    if col != 'binary_label':
        df[col] = pd.to_numeric(df[col], errors='coerce')

# Drop any new NaNs from conversion
df.dropna(inplace=True)

# Sample balanced data to manage memory
SAMPLE_SIZE_PER_CLASS = 500000
benign = df[df['binary_label'] == 0].sample(n=SAMPLE_SIZE_PER_CLASS, random_state=42)
attack = df[df['binary_label'] == 1].sample(n=SAMPLE_SIZE_PER_CLASS, random_state=42)
df = pd.concat([benign, attack], ignore_index=True)
print(f"Sampled to {len(df):,} samples")

# Separate features and target
y = df['binary_label'].values
X = df.drop(columns=['binary_label']).values

# Free df memory
del df
gc.collect()

print(f"X shape: {X.shape}")

Binary class distribution:
0    6077145
1    2170743
Name: binary_label, dtype: int64
Sampled to 1,000,000 samples
X shape: (1000000, 76)


In [5]:
# Split into Train, Val, Test
# Stratified split to maintain class balance
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

print(f"Train: {X_train.shape[0]:,}")
print(f"Val:   {X_val.shape[0]:,}")
print(f"Test:  {X_test.shape[0]:,}")

Train: 700,000
Val:   150,000
Test:  150,000


In [6]:
# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [7]:
# Create DataLoaders
def create_loaders(X, y, batch_size=256):
    # Reshape for CNN: (batch, channels, features) -> (batch, 1, n_features)
    X_tensor = torch.FloatTensor(X).unsqueeze(1)
    y_tensor = torch.LongTensor(y)
    
    dataset = TensorDataset(X_tensor, y_tensor)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return loader

BATCH_SIZE = 1024 # Larger batch size for large dataset
train_loader = create_loaders(X_train, y_train, BATCH_SIZE)
val_loader = create_loaders(X_val, y_val, BATCH_SIZE)
test_loader = create_loaders(X_test, y_test, BATCH_SIZE)

print(f"Train batches: {len(train_loader)}")

Train batches: 684


## 3. Define CNN Model

In [8]:
class CNNClassifier(nn.Module):
    def __init__(self, input_dim, num_classes=2, dropout_rate=0.3):
        super(CNNClassifier, self).__init__()
        
        self.conv1 = nn.Conv1d(1, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(64)
        
        self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(128)
        
        self.conv3 = nn.Conv1d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm1d(256)
        
        self.adaptive_pool = nn.AdaptiveAvgPool1d(8)
        
        self.fc1 = nn.Linear(256 * 8, 256)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(256, 64)
        self.dropout2 = nn.Dropout(dropout_rate)
        self.fc3 = nn.Linear(64, num_classes)
        
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.relu(self.bn3(self.conv3(x)))
        
        x = self.adaptive_pool(x)
        x = x.view(x.size(0), -1)
        
        x = self.dropout1(self.relu(self.fc1(x)))
        x = self.dropout2(self.relu(self.fc2(x)))
        x = self.fc3(x)
        return x

input_dim = X_train.shape[1]
model = CNNClassifier(input_dim=input_dim).to(device)
print(model)

CNNClassifier(
  (conv1): Conv1d(1, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=(1,))
  (bn3): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (adaptive_pool): AdaptiveAvgPool1d(output_size=8)
  (fc1): Linear(in_features=2048, out_features=256, bias=True)
  (dropout1): Dropout(p=0.3, inplace=False)
  (fc2): Linear(in_features=256, out_features=64, bias=True)
  (dropout2): Dropout(p=0.3, inplace=False)
  (fc3): Linear(in_features=64, out_features=2, bias=True)
  (relu): ReLU()
)


## 4. Training

In [9]:
# Loss with class weighting (Focal Loss)
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        
    def forward(self, inputs, targets):
        ce_loss = nn.functional.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss
        return focal_loss.mean()

criterion = FocalLoss(alpha=0.25, gamma=2)
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5)

EPOCHS = 20
EARLY_STOPPING_PATIENCE = 5

In [10]:
def train_epoch(model, loader, criterion, optimizer):
    model.train()
    total_loss, correct, total = 0, 0, 0
    
    for X_batch, y_batch in loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item() * X_batch.size(0)
        _, predicted = torch.max(outputs, 1)
        total += y_batch.size(0)
        correct += (predicted == y_batch).sum().item()
        
    return total_loss / total, correct / total

def validate(model, loader, criterion):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    
    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            
            total_loss += loss.item() * X_batch.size(0)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
            
    return total_loss / total, correct / total

In [11]:
best_val_loss = float('inf')
patience_counter = 0

print("Starting training...")
for epoch in range(EPOCHS):
    start_time = time.time()
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer)
    val_loss, val_acc = validate(model, val_loader, criterion)
    
    scheduler.step(val_loss)
    
    print(f"Epoch {epoch+1}/{EPOCHS} | Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f} Acc: {val_acc:.4f} | Time: {time.time()-start_time:.1f}s")
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), '../../results/models/best_cnn_cicids2018.pth')
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= EARLY_STOPPING_PATIENCE:
            print("Early stopping triggered.")
            break

Starting training...
Epoch 1/20 | Train Loss: 0.0089 Acc: 0.9540 | Val Loss: 0.0081 Acc: 0.9584 | Time: 64.8s
Epoch 2/20 | Train Loss: 0.0084 Acc: 0.9586 | Val Loss: 0.0080 Acc: 0.9603 | Time: 63.1s
Epoch 3/20 | Train Loss: 0.0082 Acc: 0.9594 | Val Loss: 0.0084 Acc: 0.9577 | Time: 63.1s
Epoch 4/20 | Train Loss: 0.0081 Acc: 0.9596 | Val Loss: 0.0078 Acc: 0.9610 | Time: 58.4s
Epoch 5/20 | Train Loss: 0.0080 Acc: 0.9598 | Val Loss: 0.0080 Acc: 0.9590 | Time: 70.5s
Epoch 6/20 | Train Loss: 0.0079 Acc: 0.9600 | Val Loss: 0.0078 Acc: 0.9608 | Time: 56.7s
Epoch 7/20 | Train Loss: 0.0078 Acc: 0.9604 | Val Loss: 0.0076 Acc: 0.9613 | Time: 63.5s
Epoch 8/20 | Train Loss: 0.0078 Acc: 0.9604 | Val Loss: 0.0076 Acc: 0.9613 | Time: 60.0s
Epoch 9/20 | Train Loss: 0.0077 Acc: 0.9608 | Val Loss: 0.0136 Acc: 0.9285 | Time: 89.4s
Epoch 10/20 | Train Loss: 0.0077 Acc: 0.9611 | Val Loss: 0.0076 Acc: 0.9599 | Time: 81.5s
Epoch 11/20 | Train Loss: 0.0077 Acc: 0.9615 | Val Loss: 0.0297 Acc: 0.8081 | Time: 59.0

## 5. Evaluation

In [12]:
# Load best model
model.load_state_dict(torch.load('../../results/models/best_cnn_cicids2018.pth', weights_only=True))
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(y_batch.numpy())

print("Classification Report:")
print(classification_report(all_labels, all_preds, target_names=['Benign', 'Attack']))

cm = confusion_matrix(all_labels, all_preds)
print("Confusion Matrix:")
print(f"TN: {cm[0][0]:,} | FP: {cm[0][1]:,}")
print(f"FN: {cm[1][0]:,} | TP: {cm[1][1]:,}")

accuracy = (cm[0][0] + cm[1][1]) / cm.sum()
print(f"\nTest Accuracy: {accuracy*100:.2f}%")

torch.cuda.empty_cache()
gc.collect()

Classification Report:
              precision    recall  f1-score   support

      Benign       0.94      0.99      0.97     75000
      Attack       0.99      0.94      0.96     75000

    accuracy                           0.96    150000
   macro avg       0.97      0.96      0.96    150000
weighted avg       0.97      0.96      0.96    150000

Confusion Matrix:
TN: 74,514 | FP: 486
FN: 4,863 | TP: 70,137

Test Accuracy: 96.43%


In [13]:
# Save results
with open('../../results/cnn_cicids2018_results.txt', 'w') as f:
    f.write("CNN CICIDS2018 Results\n")
    f.write("=" * 50 + "\n")
    f.write(classification_report(all_labels, all_preds, target_names=['Benign', 'Attack']))
    f.write(f"\nConfusion Matrix:\n{cm}\n")
print("Results saved!")

Results saved!
