# NFStream-Optimized Model Training

This notebook trains a model specifically optimized for NFStream feature extraction.

## Why This Notebook?
- Original model trained on CICFlowMeter features
- NFStream extracts features with different names/calculations
- This model is trained on NFStream-compatible features for better PCAP accuracy

## Approach
1. Load CICIDS2017 CSV data (with labels)
2. Map CICFlowMeter features → NFStream feature names
3. Train model on NFStream-named features
4. Test with actual PCAP extraction


In [None]:
# Imports and Setup
import pandas as pd
import numpy as np
import os
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set paths
BASE_DIR = Path.cwd().parent
DATASET_DIR = BASE_DIR / 'dataset'
MODELS_DIR = BASE_DIR / 'models'
RESULTS_DIR = BASE_DIR / 'results'
PCAP_DIR = BASE_DIR / 'pcap'

print(f"Base directory: {BASE_DIR}")
print(f"Dataset directory: {DATASET_DIR}")
print(f"Models directory: {MODELS_DIR}")


## 1. Define Feature Mapping

Map CICFlowMeter (CICIDS2017) feature names to NFStream feature names.


In [None]:
# CICFlowMeter to NFStream Feature Mapping
# This maps CICIDS2017 CSV column names to NFStream attribute names

CICIDS_TO_NFSTREAM = {
    # Port and protocol
    'Destination Port': 'dst_port',
    
    # Duration
    'Flow Duration': 'bidirectional_duration_ms',
    
    # Packet counts
    'Total Fwd Packets': 'src2dst_packets',
    'Total Backward Packets': 'dst2src_packets',
    
    # Byte counts
    'Total Length of Fwd Packets': 'src2dst_bytes',
    'Total Length of Bwd Packets': 'dst2src_bytes',
    
    # Forward packet length stats
    'Fwd Packet Length Max': 'src2dst_max_ps',
    'Fwd Packet Length Min': 'src2dst_min_ps',
    'Fwd Packet Length Mean': 'src2dst_mean_ps',
    'Fwd Packet Length Std': 'src2dst_stddev_ps',
    
    # Backward packet length stats
    'Bwd Packet Length Max': 'dst2src_max_ps',
    'Bwd Packet Length Min': 'dst2src_min_ps',
    'Bwd Packet Length Mean': 'dst2src_mean_ps',
    'Bwd Packet Length Std': 'dst2src_stddev_ps',
    
    # Flow-level stats (bidirectional)
    'Min Packet Length': 'bidirectional_min_ps',
    'Max Packet Length': 'bidirectional_max_ps',
    'Packet Length Mean': 'bidirectional_mean_ps',
    'Packet Length Std': 'bidirectional_stddev_ps',
    
    # Inter-arrival times (flow level)
    'Flow IAT Mean': 'bidirectional_mean_piat_ms',
    'Flow IAT Std': 'bidirectional_stddev_piat_ms',
    'Flow IAT Max': 'bidirectional_max_piat_ms',
    'Flow IAT Min': 'bidirectional_min_piat_ms',
    
    # Forward IAT
    'Fwd IAT Total': 'src2dst_duration_ms',
    'Fwd IAT Mean': 'src2dst_mean_piat_ms',
    'Fwd IAT Std': 'src2dst_stddev_piat_ms',
    'Fwd IAT Max': 'src2dst_max_piat_ms',
    'Fwd IAT Min': 'src2dst_min_piat_ms',
    
    # Backward IAT
    'Bwd IAT Total': 'dst2src_duration_ms',
    'Bwd IAT Mean': 'dst2src_mean_piat_ms',
    'Bwd IAT Std': 'dst2src_stddev_piat_ms',
    'Bwd IAT Max': 'dst2src_max_piat_ms',
    'Bwd IAT Min': 'dst2src_min_piat_ms',
    
    # TCP Flags - Forward
    'Fwd PSH Flags': 'src2dst_psh_packets',
    'Fwd URG Flags': 'src2dst_urg_packets',
    
    # TCP Flags - Backward
    'Bwd PSH Flags': 'dst2src_psh_packets',
    'Bwd URG Flags': 'dst2src_urg_packets',
    
    # TCP Flags - Combined (we'll compute from src2dst + dst2src)
    'FIN Flag Count': 'fin_flag_count',  # Derived
    'SYN Flag Count': 'syn_flag_count',  # Derived
    'RST Flag Count': 'rst_flag_count',  # Derived
    'PSH Flag Count': 'psh_flag_count',  # Derived
    'ACK Flag Count': 'ack_flag_count',  # Derived
    'URG Flag Count': 'urg_flag_count',  # Derived
}

# Features that NFStream can provide
NFSTREAM_AVAILABLE_FEATURES = [
    'dst_port',
    'bidirectional_duration_ms',
    'src2dst_packets', 'dst2src_packets',
    'src2dst_bytes', 'dst2src_bytes',
    'src2dst_max_ps', 'src2dst_min_ps', 'src2dst_mean_ps', 'src2dst_stddev_ps',
    'dst2src_max_ps', 'dst2src_min_ps', 'dst2src_mean_ps', 'dst2src_stddev_ps',
    'bidirectional_min_ps', 'bidirectional_max_ps', 'bidirectional_mean_ps', 'bidirectional_stddev_ps',
    'bidirectional_mean_piat_ms', 'bidirectional_stddev_piat_ms', 'bidirectional_max_piat_ms', 'bidirectional_min_piat_ms',
    'src2dst_duration_ms', 'src2dst_mean_piat_ms', 'src2dst_stddev_piat_ms', 'src2dst_max_piat_ms', 'src2dst_min_piat_ms',
    'dst2src_duration_ms', 'dst2src_mean_piat_ms', 'dst2src_stddev_piat_ms', 'dst2src_max_piat_ms', 'dst2src_min_piat_ms',
    'src2dst_psh_packets', 'src2dst_urg_packets', 'src2dst_syn_packets', 'src2dst_fin_packets', 'src2dst_rst_packets', 'src2dst_ack_packets',
    'dst2src_psh_packets', 'dst2src_urg_packets', 'dst2src_syn_packets', 'dst2src_fin_packets', 'dst2src_rst_packets', 'dst2src_ack_packets',
    # Derived features we'll calculate
    'bidirectional_packets', 'bidirectional_bytes',
    'flow_bytes_per_second', 'flow_packets_per_second',
    'fwd_packets_per_second', 'bwd_packets_per_second',
    'packet_length_variance', 'down_up_ratio', 'average_packet_size',
]

print(f"CICFlowMeter to NFStream mappings defined: {len(CICIDS_TO_NFSTREAM)}")
print(f"NFStream available features: {len(NFSTREAM_AVAILABLE_FEATURES)}")


## 2. Load CICIDS2017 Data and Transform to NFStream Features


In [None]:
# Load all CICIDS2017 CSV files
csv_files = sorted(DATASET_DIR.glob('*.csv'))
print(f"Found {len(csv_files)} CSV files")

dataframes = []
for file in csv_files:
    print(f"Loading {file.name}...")
    df_temp = pd.read_csv(file, low_memory=False)
    df_temp.columns = df_temp.columns.str.strip()
    print(f"  Shape: {df_temp.shape}")
    dataframes.append(df_temp)

# Combine all dataframes
print("\nCombining all dataframes...")
df = pd.concat(dataframes, ignore_index=True)
print(f"Combined dataset shape: {df.shape}")
print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")


In [None]:
# Transform CICIDS2017 data to NFStream-compatible features
def transform_to_nfstream_features(df_cicids):
    """
    Transform CICIDS2017 dataframe to NFStream-compatible feature names and format.
    """
    df_nf = pd.DataFrame()
    
    # Direct mappings
    df_nf['dst_port'] = df_cicids.get('Destination Port', 0)
    df_nf['bidirectional_duration_ms'] = df_cicids.get('Flow Duration', 0)
    
    # Packet counts
    df_nf['src2dst_packets'] = df_cicids.get('Total Fwd Packets', 0)
    df_nf['dst2src_packets'] = df_cicids.get('Total Backward Packets', 0)
    df_nf['bidirectional_packets'] = df_nf['src2dst_packets'] + df_nf['dst2src_packets']
    
    # Byte counts
    df_nf['src2dst_bytes'] = df_cicids.get('Total Length of Fwd Packets', 0)
    df_nf['dst2src_bytes'] = df_cicids.get('Total Length of Bwd Packets', 0)
    df_nf['bidirectional_bytes'] = df_nf['src2dst_bytes'] + df_nf['dst2src_bytes']
    
    # Forward packet length stats
    df_nf['src2dst_max_ps'] = df_cicids.get('Fwd Packet Length Max', 0)
    df_nf['src2dst_min_ps'] = df_cicids.get('Fwd Packet Length Min', 0)
    df_nf['src2dst_mean_ps'] = df_cicids.get('Fwd Packet Length Mean', 0)
    df_nf['src2dst_stddev_ps'] = df_cicids.get('Fwd Packet Length Std', 0)
    
    # Backward packet length stats
    df_nf['dst2src_max_ps'] = df_cicids.get('Bwd Packet Length Max', 0)
    df_nf['dst2src_min_ps'] = df_cicids.get('Bwd Packet Length Min', 0)
    df_nf['dst2src_mean_ps'] = df_cicids.get('Bwd Packet Length Mean', 0)
    df_nf['dst2src_stddev_ps'] = df_cicids.get('Bwd Packet Length Std', 0)
    
    # Bidirectional packet length stats
    df_nf['bidirectional_min_ps'] = df_cicids.get('Min Packet Length', 0)
    df_nf['bidirectional_max_ps'] = df_cicids.get('Max Packet Length', 0)
    df_nf['bidirectional_mean_ps'] = df_cicids.get('Packet Length Mean', 0)
    df_nf['bidirectional_stddev_ps'] = df_cicids.get('Packet Length Std', 0)
    
    # Flow IAT
    df_nf['bidirectional_mean_piat_ms'] = df_cicids.get('Flow IAT Mean', 0)
    df_nf['bidirectional_stddev_piat_ms'] = df_cicids.get('Flow IAT Std', 0)
    df_nf['bidirectional_max_piat_ms'] = df_cicids.get('Flow IAT Max', 0)
    df_nf['bidirectional_min_piat_ms'] = df_cicids.get('Flow IAT Min', 0)
    
    # Forward IAT
    df_nf['src2dst_duration_ms'] = df_cicids.get('Fwd IAT Total', 0)
    df_nf['src2dst_mean_piat_ms'] = df_cicids.get('Fwd IAT Mean', 0)
    df_nf['src2dst_stddev_piat_ms'] = df_cicids.get('Fwd IAT Std', 0)
    df_nf['src2dst_max_piat_ms'] = df_cicids.get('Fwd IAT Max', 0)
    df_nf['src2dst_min_piat_ms'] = df_cicids.get('Fwd IAT Min', 0)
    
    # Backward IAT
    df_nf['dst2src_duration_ms'] = df_cicids.get('Bwd IAT Total', 0)
    df_nf['dst2src_mean_piat_ms'] = df_cicids.get('Bwd IAT Mean', 0)
    df_nf['dst2src_stddev_piat_ms'] = df_cicids.get('Bwd IAT Std', 0)
    df_nf['dst2src_max_piat_ms'] = df_cicids.get('Bwd IAT Max', 0)
    df_nf['dst2src_min_piat_ms'] = df_cicids.get('Bwd IAT Min', 0)
    
    # TCP Flags
    df_nf['src2dst_psh_packets'] = df_cicids.get('Fwd PSH Flags', 0)
    df_nf['src2dst_urg_packets'] = df_cicids.get('Fwd URG Flags', 0)
    df_nf['dst2src_psh_packets'] = df_cicids.get('Bwd PSH Flags', 0)
    df_nf['dst2src_urg_packets'] = df_cicids.get('Bwd URG Flags', 0)
    
    # Combined flags
    df_nf['src2dst_syn_packets'] = df_cicids.get('SYN Flag Count', 0)
    df_nf['src2dst_fin_packets'] = df_cicids.get('FIN Flag Count', 0)
    df_nf['src2dst_rst_packets'] = df_cicids.get('RST Flag Count', 0)
    df_nf['src2dst_ack_packets'] = df_cicids.get('ACK Flag Count', 0)
    df_nf['dst2src_syn_packets'] = 0  # Not available separately
    df_nf['dst2src_fin_packets'] = 0
    df_nf['dst2src_rst_packets'] = 0
    df_nf['dst2src_ack_packets'] = 0
    
    # Derived features (calculated same way as NFStream)
    duration_s = (df_nf['bidirectional_duration_ms'] / 1000).replace(0, 0.001)
    df_nf['flow_bytes_per_second'] = df_nf['bidirectional_bytes'] / duration_s
    df_nf['flow_packets_per_second'] = df_nf['bidirectional_packets'] / duration_s
    df_nf['fwd_packets_per_second'] = df_nf['src2dst_packets'] / duration_s
    df_nf['bwd_packets_per_second'] = df_nf['dst2src_packets'] / duration_s
    
    # More derived features
    df_nf['packet_length_variance'] = df_nf['bidirectional_stddev_ps'] ** 2
    df_nf['down_up_ratio'] = df_nf['dst2src_packets'] / df_nf['src2dst_packets'].replace(0, 1)
    df_nf['average_packet_size'] = df_nf['bidirectional_bytes'] / df_nf['bidirectional_packets'].replace(0, 1)
    
    return df_nf

# Transform the data
print("Transforming CICIDS2017 data to NFStream features...")
df_nfstream = transform_to_nfstream_features(df)
print(f"Transformed shape: {df_nfstream.shape}")
print(f"\nNFStream feature columns ({len(df_nfstream.columns)}):")
print(list(df_nfstream.columns))


## 3. Prepare Labels and Preprocess Data


In [None]:
# Get labels from original data
label_col = 'Label'
y_original = df[label_col].copy()

print(f"Label distribution:")
print(y_original.value_counts())

# Handle class imbalance: combine rare classes into 'Other'
label_counts = y_original.value_counts()
rare_threshold = 100
rare_classes = label_counts[label_counts < rare_threshold].index.tolist()

print(f"\nRare classes (< {rare_threshold} samples): {rare_classes}")

# Create multiclass labels
y = y_original.copy()
if rare_classes:
    y = y.replace(rare_classes, 'Other')
    print(f"\nAfter combining rare classes:")
    print(y.value_counts())


In [None]:
# Preprocess: Handle infinite and missing values
X = df_nfstream.copy()

print(f"Features shape: {X.shape}")

# Handle infinite values
print("Handling infinite values...")
X = X.replace([np.inf, -np.inf], np.nan)

# Fill missing values with median
print("Filling missing values...")
for col in X.columns:
    if X[col].isnull().sum() > 0:
        X[col] = X[col].fillna(X[col].median())

# Final check
print(f"Missing values: {X.isnull().sum().sum()}")
print(f"Infinite values: {np.isinf(X.select_dtypes(include=[np.number])).sum().sum()}")
print(f"\nFinal features shape: {X.shape}")
print(f"Final labels shape: {y.shape}")


## 4. Train-Test Split


In [None]:
from sklearn.model_selection import train_test_split

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=42, 
    stratify=y
)

print(f"Training set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")
print(f"\nTraining set class distribution:")
print(y_train.value_counts())


## 5. Train NFStream-Optimized Random Forest Model


In [None]:
from sklearn.ensemble import RandomForestClassifier
import time

print("Training NFStream-Optimized Random Forest Classifier...")
print("="*60)

# Initialize Random Forest with class_weight='balanced'
rf_nfstream = RandomForestClassifier(
    n_estimators=100,
    random_state=42,
    n_jobs=-1,
    class_weight='balanced',
    verbose=1
)

# Train the model
start_time = time.time()
rf_nfstream.fit(X_train, y_train)
training_time = time.time() - start_time

print(f"\n✅ Training completed in {training_time:.2f} seconds ({training_time/60:.2f} minutes)")


## 6. Evaluate Model


In [None]:
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    classification_report, confusion_matrix
)

# Make predictions
print("Making predictions on test set...")
y_pred = rf_nfstream.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision_macro = precision_score(y_test, y_pred, average='macro', zero_division=0)
recall_macro = recall_score(y_test, y_pred, average='macro', zero_division=0)
f1_macro = f1_score(y_test, y_pred, average='macro', zero_division=0)

print("\n" + "="*60)
print("NFSTREAM-OPTIMIZED MODEL PERFORMANCE")
print("="*60)
print(f"Accuracy:        {accuracy:.4f} ({accuracy*100:.2f}%)")
print(f"Precision (Macro): {precision_macro:.4f}")
print(f"Recall (Macro):    {recall_macro:.4f}")
print(f"F1-Score (Macro):  {f1_macro:.4f}")
print("="*60)

# Detailed report
print("\nDetailed Classification Report:")
print(classification_report(y_test, y_pred, zero_division=0))


## 7. Save NFStream-Optimized Model


In [None]:
import joblib
from datetime import datetime

# Create models directory
MODELS_DIR.mkdir(exist_ok=True)

# Save model
model_filename = MODELS_DIR / 'random_forest_nfstream_optimized.joblib'
joblib.dump(rf_nfstream, model_filename)
print(f"✅ Model saved to: {model_filename}")

# Save feature names (NFStream format)
feature_names_file = MODELS_DIR / 'feature_names_nfstream.joblib'
joblib.dump(list(X.columns), feature_names_file)
print(f"✅ Feature names saved to: {feature_names_file}")

# Save class names
class_names = sorted(y.unique())
class_names_file = MODELS_DIR / 'class_names_nfstream.joblib'
joblib.dump(class_names, class_names_file)
print(f"✅ Class names saved to: {class_names_file}")

print(f"\nFeatures ({len(X.columns)}): {list(X.columns)[:10]}...")
print(f"Classes ({len(class_names)}): {class_names}")


## 8. Test with Real PCAP File

Now let's test the NFStream-optimized model with actual PCAP extraction to verify it works correctly.


In [None]:
# Test with Monday PCAP file
from nfstream import NFStreamer

pcap_file = PCAP_DIR / 'Monday-WorkingHours.pcap'
print(f"PCAP file: {pcap_file}")
print(f"File exists: {pcap_file.exists()}")

if pcap_file.exists():
    print(f"\nExtracting features from PCAP using NFStream...")
    
    # Define attributes to extract
    FLOW_ATTRIBUTES = [
        'dst_port', 'bidirectional_duration_ms',
        'src2dst_packets', 'dst2src_packets', 'bidirectional_packets',
        'src2dst_bytes', 'dst2src_bytes', 'bidirectional_bytes',
        'src2dst_max_ps', 'src2dst_min_ps', 'src2dst_mean_ps', 'src2dst_stddev_ps',
        'dst2src_max_ps', 'dst2src_min_ps', 'dst2src_mean_ps', 'dst2src_stddev_ps',
        'bidirectional_min_ps', 'bidirectional_max_ps', 'bidirectional_mean_ps', 'bidirectional_stddev_ps',
        'bidirectional_mean_piat_ms', 'bidirectional_stddev_piat_ms', 'bidirectional_max_piat_ms', 'bidirectional_min_piat_ms',
        'src2dst_duration_ms', 'src2dst_mean_piat_ms', 'src2dst_stddev_piat_ms', 'src2dst_max_piat_ms', 'src2dst_min_piat_ms',
        'dst2src_duration_ms', 'dst2src_mean_piat_ms', 'dst2src_stddev_piat_ms', 'dst2src_max_piat_ms', 'dst2src_min_piat_ms',
        'src2dst_psh_packets', 'src2dst_urg_packets', 'src2dst_syn_packets', 'src2dst_fin_packets', 'src2dst_rst_packets', 'src2dst_ack_packets',
        'dst2src_psh_packets', 'dst2src_urg_packets', 'dst2src_syn_packets', 'dst2src_fin_packets', 'dst2src_rst_packets', 'dst2src_ack_packets',
    ]
    
    # Extract flows
    streamer = NFStreamer(
        source=str(pcap_file),
        statistical_analysis=True,
        splt_analysis=0,
        n_dissections=0,
    )
    
    MAX_FLOWS = 50000  # Limit for quick testing
    flows_list = []
    
    print(f"Extracting up to {MAX_FLOWS:,} flows...")
    for i, flow in enumerate(streamer):
        flow_dict = {}
        for attr in FLOW_ATTRIBUTES:
            try:
                flow_dict[attr] = getattr(flow, attr, 0)
            except:
                flow_dict[attr] = 0
        flows_list.append(flow_dict)
        
        if (i + 1) % 10000 == 0:
            print(f"  Processed {i+1:,} flows...")
        
        if i + 1 >= MAX_FLOWS:
            break
    
    print(f"\n✅ Extracted {len(flows_list):,} flows")
    
    # Convert to DataFrame
    df_pcap = pd.DataFrame(flows_list)
    print(f"PCAP DataFrame shape: {df_pcap.shape}")


In [None]:
# Add derived features (same as during training)
if 'df_pcap' in dir():
    print("Adding derived features...")
    
    # Calculate derived features
    duration_s = (df_pcap['bidirectional_duration_ms'] / 1000).replace(0, 0.001)
    df_pcap['flow_bytes_per_second'] = df_pcap['bidirectional_bytes'] / duration_s
    df_pcap['flow_packets_per_second'] = df_pcap['bidirectional_packets'] / duration_s
    df_pcap['fwd_packets_per_second'] = df_pcap['src2dst_packets'] / duration_s
    df_pcap['bwd_packets_per_second'] = df_pcap['dst2src_packets'] / duration_s
    df_pcap['packet_length_variance'] = df_pcap['bidirectional_stddev_ps'] ** 2
    df_pcap['down_up_ratio'] = df_pcap['dst2src_packets'] / df_pcap['src2dst_packets'].replace(0, 1)
    df_pcap['average_packet_size'] = df_pcap['bidirectional_bytes'] / df_pcap['bidirectional_packets'].replace(0, 1)
    
    # Ensure all required features exist
    required_features = list(X.columns)
    for feat in required_features:
        if feat not in df_pcap.columns:
            df_pcap[feat] = 0
    
    # Select features in correct order
    X_pcap = df_pcap[required_features].copy()
    
    # Handle infinite/missing values
    X_pcap = X_pcap.replace([np.inf, -np.inf], np.nan)
    X_pcap = X_pcap.fillna(0)
    
    print(f"PCAP features shape: {X_pcap.shape}")
    print(f"Features match training: {list(X_pcap.columns) == required_features}")


In [None]:
# Make predictions on PCAP data
if 'X_pcap' in dir():
    print("Making predictions on PCAP data...")
    pcap_predictions = rf_nfstream.predict(X_pcap)
    
    # Show results
    print("\n" + "="*60)
    print("PCAP ANALYSIS RESULTS (NFStream-Optimized Model)")
    print("="*60)
    print(f"Total flows analyzed: {len(pcap_predictions):,}")
    
    # Prediction distribution
    pred_counts = pd.Series(pcap_predictions).value_counts()
    print(f"\nPrediction Distribution:")
    for label, count in pred_counts.items():
        pct = count / len(pcap_predictions) * 100
        print(f"  {label}: {count:,} ({pct:.2f}%)")
    
    # Check if all predictions are BENIGN (expected for Monday)
    benign_count = (pcap_predictions == 'BENIGN').sum()
    print(f"\n{'✅' if benign_count == len(pcap_predictions) else '⚠️'} BENIGN traffic: {benign_count:,} ({benign_count/len(pcap_predictions)*100:.2f}%)")
    
    if benign_count == len(pcap_predictions):
        print("\n✅ SUCCESS! All Monday traffic correctly classified as BENIGN!")
        print("   The NFStream-optimized model is working correctly.")
    else:
        print(f"\n⚠️ Some flows classified as attacks: {len(pcap_predictions) - benign_count:,}")
        print("   (This could be false positives due to feature differences)")
    
    print("="*60)


## Summary

### What This Notebook Created:

1. **NFStream-Optimized Model** (`random_forest_nfstream_optimized.joblib`)
   - Trained on NFStream-compatible feature names
   - Ready for production PCAP analysis

2. **Feature Names** (`feature_names_nfstream.joblib`)
   - List of features in NFStream format
   - Used by the feature extractor

3. **Class Names** (`class_names_nfstream.joblib`)
   - Attack type labels

### Usage:
```python
# Load model
import joblib
model = joblib.load('models/random_forest_nfstream_optimized.joblib')
features = joblib.load('models/feature_names_nfstream.joblib')

# Extract features from PCAP with NFStream
# Ensure feature order matches training
# Make predictions
predictions = model.predict(X_pcap[features])
```

### Next Steps:
1. Update `src/predictor.py` to use the new NFStream-optimized model
2. Test with PCAP files containing attacks
3. Integrate into web application
