In [47]:
from joblib import load
import pandas as pd

# Load model, scaler, and features
model = load("model.joblib")
scaler = load("scaler.joblib")   # skip if you didn’t save scaler

with open("features.txt") as f:
    features = [line.strip() for line in f]

print("Features order:", features)


Features order: ['Src Port', 'Dst Port', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Avg', 'Bwd

In [48]:
# Load your new CSV data
new_data = pd.read_csv("C:\\Users\\Asus\\Documents\\MMU\\degree sem 7 intern\\Reports\\ROS-SCRIPTS\\! important\\acmirf-new\\NavBot25.csv")

# Compare model features vs CSV columns
print("Model expects:", features[:10], "...")  # first 10 expected features
print("CSV has:", list(new_data.columns)[:10], "...")  # first 10 columns from CSV

missing = set(features) - set(new_data.columns)
extra = set(new_data.columns) - set(features)

print("Missing in CSV:", missing)
print("Extra in CSV:", extra)


Model expects: ['Src Port', 'Dst Port', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean'] ...
CSV has: ['Flow ID', 'Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Protocol', 'Timestamp', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts'] ...
Missing in CSV: set()
Extra in CSV: {'Flow ID', 'Dst IP', 'Timestamp', 'Label', 'Protocol', 'Src IP'}


In [49]:
# Force reordering of columns to match training order
new_data = new_data.reindex(columns=features)


In [50]:
# Keep only model's features (drop all others)
new_data = new_data.loc[:, new_data.columns.intersection(features)]

# Reorder columns to exactly match training
new_data = new_data.reindex(columns=features)

In [51]:
print("Expected features:", features)
print("New data columns :", list(new_data.columns))
print("Shape of new_data:", new_data.shape)

Expected features: ['Src Port', 'Dst Port', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Avg', '

In [52]:
new_data = new_data[features].copy()


In [53]:
# Clean up column names (remove hidden spaces etc.)
new_data.columns = new_data.columns.str.strip()

# Now force subset to ONLY training features
new_data = new_data.loc[:, features]

# Confirm
print("Training features:", features)
print("New data columns:", list(new_data.columns))
print("Same?", features == list(new_data.columns))


Training features: ['Src Port', 'Dst Port', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts', 'TotLen Bwd Pkts', 'Fwd Pkt Len Max', 'Fwd Pkt Len Min', 'Fwd Pkt Len Mean', 'Fwd Pkt Len Std', 'Bwd Pkt Len Max', 'Bwd Pkt Len Min', 'Bwd Pkt Len Mean', 'Bwd Pkt Len Std', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max', 'Flow IAT Min', 'Fwd IAT Tot', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Len', 'Bwd Header Len', 'Fwd Pkts/s', 'Bwd Pkts/s', 'Pkt Len Min', 'Pkt Len Max', 'Pkt Len Mean', 'Pkt Len Std', 'Pkt Len Var', 'FIN Flag Cnt', 'SYN Flag Cnt', 'RST Flag Cnt', 'PSH Flag Cnt', 'ACK Flag Cnt', 'URG Flag Cnt', 'CWE Flag Count', 'ECE Flag Cnt', 'Down/Up Ratio', 'Pkt Size Avg', 'Fwd Seg Size Avg', 'Bwd Seg Size Avg', 'Fwd Byts/b Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Avg', '

In [54]:
print("Scaler feature names:", list(scaler.feature_names_in_))
print("New data columns    :", list(new_data.columns))
print("Match?", list(scaler.feature_names_in_) == list(new_data.columns))

Scaler feature names: ['Dst Port', 'Src Port', 'Bwd Header Len', 'Bwd Pkts/s', 'Bwd IAT Tot', 'Bwd IAT Mean', 'Pkt Len Mean', 'Flow Pkts/s', 'Pkt Size Avg', 'Flow Duration', 'Pkt Len Std', 'Pkt Len Var', 'Flow IAT Mean', 'Bwd IAT Max', 'Subflow Bwd Byts', 'TotLen Bwd Pkts', 'Bwd Seg Size Avg', 'Bwd Pkt Len Mean', 'Flow IAT Max', 'Init Bwd Win Byts', 'Pkt Len Max', 'Flow Byts/s', 'Fwd Pkts/s', 'Fwd Header Len', 'Flow IAT Std', 'Subflow Bwd Pkts', 'Tot Bwd Pkts', 'Bwd IAT Min', 'Bwd Pkt Len Max', 'Bwd IAT Std', 'Tot Fwd Pkts', 'Subflow Fwd Pkts', 'Bwd Pkt Len Std', 'Flow IAT Min', 'Bwd Pkt Len Min', 'Fwd IAT Mean', 'Fwd IAT Tot', 'Down/Up Ratio', 'Idle Min', 'Idle Mean', 'Idle Max', 'Active Max', 'Fwd IAT Min', 'Active Mean', 'Fwd IAT Max', 'Active Min', 'SYN Flag Cnt', 'ACK Flag Cnt', 'Fwd IAT Std', 'Fwd Pkt Len Max', 'Fwd Pkt Len Mean', 'Fwd Seg Size Avg', 'Subflow Fwd Byts', 'TotLen Fwd Pkts', 'PSH Flag Cnt', 'Bwd PSH Flags', 'Idle Std', 'Active Std', 'Fwd Act Data Pkts', 'Fwd Pkt Len

In [55]:
new_data = new_data.loc[:, scaler.feature_names_in_]


In [56]:
import pandas as pd
import numpy as np
from joblib import load

# === Load model + scaler ===
model = load("model.joblib")
scaler = load("scaler.joblib")

# Expected features from scaler
expected_features = list(scaler.feature_names_in_)

def prepare_and_predict(csv_file):
    # 1. Load CSV
    df = pd.read_csv(csv_file)

    # 2. Keep only required columns (drop timestamps or extras)
    missing = set(expected_features) - set(df.columns)
    extra = set(df.columns) - set(expected_features)
    print("Missing in CSV:", missing)
    print("Extra in CSV:", extra)

    df = df.loc[:, expected_features]  # keep only what we need, in correct order

    # 3. Clean data
    df = df.replace([np.inf, -np.inf], np.nan)  # replace inf with NaN
    df = df.fillna(0)  # fill NaN with 0 (safe default, you can change strategy)

    # 4. Scale
    X_scaled = scaler.transform(df)

    # 5. Predict
    preds = model.predict(X_scaled)
    return preds

# === Run prediction on your new CSV ===
predictions = prepare_and_predict("C:\\Users\\Asus\\Documents\\MMU\\degree sem 7 intern\\Reports\\ROS-SCRIPTS\\! important\\acmirf-new\\NavBot25.csv") # replace data here <-----------------------
print(predictions)


Missing in CSV: set()
Extra in CSV: {'Bwd Blk Rate Avg', 'Fwd Pkts/b Avg', 'Fwd Blk Rate Avg', 'Protocol', 'CWE Flag Count', 'Bwd Pkts/b Avg', 'Flow ID', 'Bwd Byts/b Avg', 'URG Flag Cnt', 'RST Flag Cnt', 'Fwd PSH Flags', 'Timestamp', 'Label', 'Bwd URG Flags', 'ECE Flag Cnt', 'Init Fwd Win Byts', 'Fwd Seg Size Min', 'Dst IP', 'Fwd Byts/b Avg', 'Fwd URG Flags', 'Src IP'}
[0 0 0 ... 7 7 7]


In [57]:
def prepare_and_predict(csv_file, save_cleaned=False):
    df = pd.read_csv(csv_file)
    df = df.loc[:, expected_features]
    df = df.replace([np.inf, -np.inf], np.nan).fillna(0)

    if save_cleaned:
        df.to_csv("cleaned_for_model.csv", index=False)
        print("Saved cleaned CSV as cleaned_for_model.csv")

    X_scaled = scaler.transform(df)
    return model.predict(X_scaled)

predictions = prepare_and_predict("C:\\Users\\Asus\\Documents\\MMU\\degree sem 7 intern\\Reports\\ROS-SCRIPTS\\! important\\acmirf-new\\NavBot25.csv", save_cleaned=True)


Saved cleaned CSV as cleaned_for_model.csv


In [58]:
def prepare_and_predict(csv_file, save_cleaned=False):
    df = pd.read_csv(csv_file)
    df = df.loc[:, expected_features]
    df = df.replace([np.inf, -np.inf], np.nan).fillna(0)

    if save_cleaned:
        df.to_csv("cleaned_for_model.csv", index=False)
        print("Saved cleaned CSV as cleaned_for_model.csv")

    X_scaled = scaler.transform(df)
    preds = model.predict(X_scaled)

    # Convert numeric predictions into labels
    decoded_preds = [label_map.get(p, f"Unknown({p})") for p in preds]
    return decoded_preds


In [59]:
# Define label mapping (example — adjust if your classes differ!)
label_map = {
    0: "BENIGN",
    1: "Botnet",
    2: "Brute Force",
    3: "DDoS",
    4: "DoS",
    5: "Infiltration",
    6: "PortScan",
    7: "Web Attack"
}

def prepare_and_predict(csv_file, save_cleaned=False):
    df = pd.read_csv(csv_file)
    df = df.loc[:, expected_features]
    df = df.replace([np.inf, -np.inf], np.nan).fillna(0)

    if save_cleaned:
        df.to_csv("cleaned_for_model.csv", index=False)
        print("Saved cleaned CSV as cleaned_for_model.csv")

    X_scaled = scaler.transform(df)
    preds = model.predict(X_scaled)

    # Convert numeric predictions into labels
    decoded_preds = [label_map.get(p, f"Unknown({p})") for p in preds]
    return decoded_preds


In [60]:
predictions = prepare_and_predict("C:\\Users\\Asus\\Documents\\MMU\\degree sem 7 intern\\Reports\\ROS-SCRIPTS\\! important\\acmirf-new\\NavBot25.csv", save_cleaned=True)

# Print first 10 results
print(predictions[:10])

# Count benign vs malicious
from collections import Counter
counts = Counter(predictions)

print("\nSummary of predictions:")
print(counts)

if counts["BENIGN"] == len(predictions):
    print("✅ All traffic is benign.")
elif counts["BENIGN"] == 0:
    print("⚠️ All traffic is malicious.")
else:
    print(f"Mix of benign and malicious: {counts['BENIGN']} benign, {len(predictions)-counts['BENIGN']} malicious")


Saved cleaned CSV as cleaned_for_model.csv
['BENIGN', 'BENIGN', 'BENIGN', 'BENIGN', 'BENIGN', 'BENIGN', 'BENIGN', 'BENIGN', 'BENIGN', 'BENIGN']

Summary of predictions:
Counter({'BENIGN': 63019, 'PortScan': 30336, 'Botnet': 29888, 'Web Attack': 29084, 'Brute Force': 25912, 'DDoS': 6135, 'DoS': 4715, 'Infiltration': 3124})
Mix of benign and malicious: 63019 benign, 129194 malicious


In [61]:
predictions = prepare_and_predict("C:\\Users\\Asus\\Documents\\MMU\\degree sem 7 intern\\Reports\\ROS-SCRIPTS\\! important\\acmirf-new\\NavBot25.csv", save_cleaned=True)

# Print first 10 results
print(predictions[:10])

# Count benign vs malicious
from collections import Counter
counts = Counter(predictions)

print("\nSummary of predictions:")
print(counts)

if counts["BENIGN"] == len(predictions):
    print("✅ All traffic is benign.")
elif counts["BENIGN"] == 0:
    print("⚠️ All traffic is malicious.")
else:
    print(f"Mix of benign and malicious: {counts['BENIGN']} benign, {len(predictions)-counts['BENIGN']} malicious")


Saved cleaned CSV as cleaned_for_model.csv
['BENIGN', 'BENIGN', 'BENIGN', 'BENIGN', 'BENIGN', 'BENIGN', 'BENIGN', 'BENIGN', 'BENIGN', 'BENIGN']

Summary of predictions:
Counter({'BENIGN': 63019, 'PortScan': 30336, 'Botnet': 29888, 'Web Attack': 29084, 'Brute Force': 25912, 'DDoS': 6135, 'DoS': 4715, 'Infiltration': 3124})
Mix of benign and malicious: 63019 benign, 129194 malicious
