In [1]:
# Quick test to verify model loading
import os
print("Current directory:", os.getcwd())
print("Files in directory:", os.listdir('.'))

# Test if model files exist
files_to_check = ['model.joblib', 'scaler.joblib', 'features.txt']
for file in files_to_check:
    if os.path.exists(file):
        print(f"✅ {file} exists")
    else:
        print(f"❌ {file} missing")

Current directory: c:\Users\B760M-ITX D4 WIFI\Documents\GitHub\ros-security\dataset\models\ac-mi-dt
Files in directory: ['acmidt.ipynb', 'datasetvalidation.ipynb', 'features.txt', 'model.joblib', 'scaler.joblib']
✅ model.joblib exists
✅ scaler.joblib exists
✅ features.txt exists


In [2]:
# Load model and test basic functionality
from joblib import load
import pandas as pd
import numpy as np

try:
    # Load model, scaler, and features
    model = load("model.joblib")
    scaler = load("scaler.joblib")
    
    with open("features.txt") as f:
        features = [line.strip() for line in f]
    
    print(f"✅ Model loaded successfully: {type(model)}")
    print(f"✅ Scaler loaded successfully: {type(scaler)}")
    print(f"✅ Features loaded: {len(features)} features")
    print(f"First 5 features: {features[:5]}")
    
except Exception as e:
    print(f"❌ Error loading models: {e}")

✅ Model loaded successfully: <class 'sklearn.tree._classes.DecisionTreeClassifier'>
✅ Scaler loaded successfully: <class 'sklearn.preprocessing._data.StandardScaler'>
✅ Features loaded: 78 features
First 5 features: ['Src Port', 'Dst Port', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts']


In [3]:
# Test with actual dataset
try:
    # Load the dataset
    dataset_path = "C:\\Users\\B760M-ITX D4 WIFI\\Downloads\\AC-MI-RF\\NavBot25.csv"
    print(f"Loading dataset from: {dataset_path}")
    
    df = pd.read_csv(dataset_path)
    print(f"✅ Dataset loaded: {df.shape}")
    print(f"Dataset columns (first 10): {list(df.columns)[:10]}")
    
    # Check feature compatibility
    expected_features = list(scaler.feature_names_in_)
    missing = set(expected_features) - set(df.columns)
    extra = set(df.columns) - set(expected_features)
    
    print(f"\\nFeature compatibility check:")
    print(f"Expected features: {len(expected_features)}")
    print(f"Dataset columns: {len(df.columns)}")
    print(f"Missing features: {len(missing)} - {list(missing)[:5] if missing else 'None'}")
    print(f"Extra columns: {len(extra)} - {list(extra)[:5] if extra else 'None'}")
    
except Exception as e:
    print(f"❌ Error loading dataset: {e}")

Loading dataset from: C:\Users\B760M-ITX D4 WIFI\Downloads\AC-MI-RF\NavBot25.csv
✅ Dataset loaded: (192213, 84)
Dataset columns (first 10): ['Flow ID', 'Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Protocol', 'Timestamp', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts']
\nFeature compatibility check:
Expected features: 63
Dataset columns: 84
Missing features: 0 - None
Extra columns: 21 - ['Dst IP', 'Fwd Byts/b Avg', 'Src IP', 'CWE Flag Count', 'Fwd PSH Flags']


In [4]:
# Complete prediction and analysis
from collections import Counter

# Define ROS security attack mapping
label_map = {
    0: "Normal",
    1: "DoS Attack", 
    2: "UnauthSub Attack",
    3: "SSH Bruteforce",
    4: "Pubflood",
    5: "Subflood", 
    6: "Reverse Shell",
    7: "Port Scanning Attack"
}

def predict_attacks(csv_file, sample_size=1000):
    """
    Predict attacks on dataset
    """
    try:
        # Load and prepare data
        df = pd.read_csv(csv_file)
        
        # Take a sample for faster processing if dataset is large
        if len(df) > sample_size:
            df_sample = df.sample(n=sample_size, random_state=42)
            print(f"Using sample of {sample_size} rows from {len(df)} total rows")
        else:
            df_sample = df
            print(f"Using all {len(df)} rows")
        
        # Keep only model features
        expected_features = list(scaler.feature_names_in_)
        df_clean = df_sample[expected_features].copy()
        
        # Clean data
        df_clean = df_clean.replace([np.inf, -np.inf], np.nan).fillna(0)
        
        # Scale features
        X_scaled = scaler.transform(df_clean)
        
        # Predict
        predictions = model.predict(X_scaled)
        
        # Convert to labels
        predicted_labels = [label_map.get(p, f"Unknown({p})") for p in predictions]
        
        return predicted_labels, len(df)
        
    except Exception as e:
        print(f"❌ Prediction error: {e}")
        return [], 0

# Run prediction
print("🔍 Starting ROS Security Analysis...")
predictions, total_samples = predict_attacks("C:\\Users\\B760M-ITX D4 WIFI\\Downloads\\AC-MI-RF\\NavBot25.csv")

if predictions:
    # Analyze results
    counts = Counter(predictions)
    sample_size = len(predictions)
    
    print(f"\\n📊 ROS Security Analysis Results:")
    print(f"Total samples in dataset: {total_samples:,}")
    print(f"Samples analyzed: {sample_size:,}")
    print(f"\\nDetected threats:")
    
    for attack_type, count in counts.most_common():
        percentage = (count / sample_size) * 100
        status = "🛡️" if attack_type == "Normal" else "🚨"
        print(f"{status} {attack_type}: {count:,} ({percentage:.2f}%)")
    
    # Security summary
    normal_count = counts.get("Normal", 0)
    attack_count = sample_size - normal_count
    
    print(f"\\n🎯 Security Summary:")
    if normal_count == sample_size:
        print("✅ All traffic is NORMAL - No threats detected!")
    elif normal_count == 0:
        print("🚨 ALL traffic contains ATTACKS - Critical security risk!")
    else:
        normal_pct = (normal_count/sample_size)*100
        attack_pct = (attack_count/sample_size)*100
        print(f"⚠️ Mixed traffic: {normal_pct:.1f}% normal, {attack_pct:.1f}% attacks")
        
        # Show most common attacks
        attack_types = {k: v for k, v in counts.items() if k != "Normal"}
        if attack_types:
            print(f"\\n🔥 Most common attacks:")
            for attack, count in sorted(attack_types.items(), key=lambda x: x[1], reverse=True)[:3]:
                pct = (count/sample_size)*100
                print(f"   • {attack}: {count:,} cases ({pct:.2f}%)")
else:
    print("❌ No predictions generated")

🔍 Starting ROS Security Analysis...
Using sample of 1000 rows from 192213 total rows
\n📊 ROS Security Analysis Results:
Total samples in dataset: 192,213
Samples analyzed: 1,000
\nDetected threats:
🛡️ Normal: 331 (33.10%)
🚨 DoS Attack: 159 (15.90%)
🚨 Port Scanning Attack: 149 (14.90%)
🚨 Reverse Shell: 148 (14.80%)
🚨 UnauthSub Attack: 139 (13.90%)
🚨 SSH Bruteforce: 34 (3.40%)
🚨 Pubflood: 26 (2.60%)
🚨 Subflood: 14 (1.40%)
\n🎯 Security Summary:
⚠️ Mixed traffic: 33.1% normal, 66.9% attacks
\n🔥 Most common attacks:
   • DoS Attack: 159 cases (15.90%)
   • Port Scanning Attack: 149 cases (14.90%)
   • Reverse Shell: 148 cases (14.80%)


In [None]:
from joblib import load
import pandas as pd

# Load model, scaler, and features
model = load("model.joblib")
scaler = load("scaler.joblib")   # skip if you didn't save scaler

with open("features.txt") as f:
    features = [line.strip() for line in f]

print("Features order:", features)

In [None]:
# Load your new CSV data
new_data = pd.read_csv("C:\\Users\\B760M-ITX D4 WIFI\\Documents\\GitHub\\ros-security\\dataset\\existing\\NavBot25.csv")

# Compare model features vs CSV columns
print("Model expects:", features[:10], "...")  # first 10 expected features
print("CSV has:", list(new_data.columns)[:10], "...")  # first 10 columns from CSV

missing = set(features) - set(new_data.columns)
extra = set(new_data.columns) - set(features)

print("Missing in CSV:", missing)
print("Extra in CSV:", extra)

In [None]:
# Force reordering of columns to match training order
new_data = new_data.reindex(columns=features)

In [None]:
# Keep only model's features (drop all others)
new_data = new_data.loc[:, new_data.columns.intersection(features)]

# Reorder columns to exactly match training
new_data = new_data.reindex(columns=features)

In [None]:
print("Expected features:", features)
print("New data columns :", list(new_data.columns))
print("Shape of new_data:", new_data.shape)

In [None]:
new_data = new_data[features].copy()

In [None]:
# Clean up column names (remove hidden spaces etc.)
new_data.columns = new_data.columns.str.strip()

# Now force subset to ONLY training features
new_data = new_data.loc[:, features]

# Confirm
print("Training features:", features)
print("New data columns:", list(new_data.columns))
print("Same?", features == list(new_data.columns))

In [None]:
print("Scaler feature names:", list(scaler.feature_names_in_))
print("New data columns    :", list(new_data.columns))
print("Match?", list(scaler.feature_names_in_) == list(new_data.columns))

In [None]:
new_data = new_data.loc[:, scaler.feature_names_in_]

In [None]:
import pandas as pd
import numpy as np
from joblib import load

# === Load model + scaler ===
model = load("model.joblib")
scaler = load("scaler.joblib")

# Expected features from scaler
expected_features = list(scaler.feature_names_in_)

def prepare_and_predict(csv_file):
    # 1. Load CSV
    df = pd.read_csv(csv_file)

    # 2. Keep only required columns (drop timestamps or extras)
    missing = set(expected_features) - set(df.columns)
    extra = set(df.columns) - set(expected_features)
    print("Missing in CSV:", missing)
    print("Extra in CSV:", extra)

    df = df.loc[:, expected_features]  # keep only what we need, in correct order

    # 3. Clean data
    df = df.replace([np.inf, -np.inf], np.nan)  # replace inf with NaN
    df = df.fillna(0)  # fill NaN with 0 (safe default, you can change strategy)

    # 4. Scale
    X_scaled = scaler.transform(df)

    # 5. Predict
    preds = model.predict(X_scaled)
    return preds

# === Run prediction on your new CSV ===
predictions = prepare_and_predict("C:\\Users\\B760M-ITX D4 WIFI\\Documents\\GitHub\\ros-security\\dataset\\existing\\NavBot25.csv") # replace data here <-----------------------
print(predictions)

In [None]:
def prepare_and_predict(csv_file, save_cleaned=False):
    df = pd.read_csv(csv_file)
    df = df.loc[:, expected_features]
    df = df.replace([np.inf, -np.inf], np.nan).fillna(0)

    if save_cleaned:
        df.to_csv("cleaned_for_model.csv", index=False)
        print("Saved cleaned CSV as cleaned_for_model.csv")

    X_scaled = scaler.transform(df)
    return model.predict(X_scaled)

predictions = prepare_and_predict("C:\\Users\\B760M-ITX D4 WIFI\\Documents\\GitHub\\ros-security\\dataset\\existing\\NavBot25.csv", save_cleaned=True)

In [None]:
def prepare_and_predict(csv_file, save_cleaned=False):
    df = pd.read_csv(csv_file)
    df = df.loc[:, expected_features]
    df = df.replace([np.inf, -np.inf], np.nan).fillna(0)

    if save_cleaned:
        df.to_csv("cleaned_for_model.csv", index=False)
        print("Saved cleaned CSV as cleaned_for_model.csv")

    X_scaled = scaler.transform(df)
    preds = model.predict(X_scaled)

    # Convert numeric predictions into labels
    decoded_preds = [label_map.get(p, f"Unknown({p})") for p in preds]
    return decoded_preds

In [None]:
# Define label mapping for ROS security attacks
label_map = {
    0: "Normal",
    1: "DoS Attack",
    2: "UnauthSub Attack",
    3: "SSH Bruteforce",
    4: "Pubflood",
    5: "Subflood",
    6: "Reverse Shell",
    7: "Port Scanning Attack"
}

def prepare_and_predict(csv_file, save_cleaned=False):
    df = pd.read_csv(csv_file)
    df = df.loc[:, expected_features]
    df = df.replace([np.inf, -np.inf], np.nan).fillna(0)

    if save_cleaned:
        df.to_csv("cleaned_for_model.csv", index=False)
        print("Saved cleaned CSV as cleaned_for_model.csv")

    X_scaled = scaler.transform(df)
    preds = model.predict(X_scaled)

    # Convert numeric predictions into labels
    decoded_preds = [label_map.get(p, f"Unknown({p})") for p in preds]
    return decoded_preds

In [None]:
predictions = prepare_and_predict("C:\\Users\\B760M-ITX D4 WIFI\\Documents\\GitHub\\ros-security\\dataset\\existing\\NavBot25.csv", save_cleaned=True)

# Print first 10 results
print(predictions[:10])

# Count normal vs attack types
from collections import Counter
counts = Counter(predictions)

print("\nSummary of predictions:")
print(counts)

if counts["Normal"] == len(predictions):
    print("✅ All traffic is normal.")
elif counts["Normal"] == 0:
    print("⚠️ All traffic contains attacks.")
else:
    print(f"Mix of normal and attack traffic: {counts['Normal']} normal, {len(predictions)-counts['Normal']} attacks")

In [None]:
predictions = prepare_and_predict("C:\\Users\\B760M-ITX D4 WIFI\\Documents\\GitHub\\ros-security\\dataset\\existing\\NavBot25.csv", save_cleaned=True)

# Print first 10 results
print("First 10 predictions:", predictions[:10])

# Count normal vs attack types
from collections import Counter
counts = Counter(predictions)

print("\nDetailed prediction summary:")
for attack_type, count in counts.items():
    percentage = (count / len(predictions)) * 100
    print(f"{attack_type}: {count} ({percentage:.2f}%)")

print(f"\nTotal samples analyzed: {len(predictions)}")

if counts["Normal"] == len(predictions):
    print("✅ All traffic is normal - No threats detected.")
elif counts["Normal"] == 0:
    print("🚨 All traffic contains attacks - High risk!")
else:
    normal_count = counts['Normal']
    attack_count = len(predictions) - normal_count
    print(f"⚠️ Mixed traffic detected: {normal_count} normal ({(normal_count/len(predictions)*100):.1f}%), {attack_count} attacks ({(attack_count/len(predictions)*100):.1f}%)")