In [1]:
# Quick test to verify model loading
import os
print("Current directory:", os.getcwd())
print("Files in directory:", os.listdir('.'))

# Test if model files exist
files_to_check = ['model.joblib', 'scaler.joblib', 'features.txt']
for file in files_to_check:
    if os.path.exists(file):
        print(f"✅ {file} exists")
    else:
        print(f"❌ {file} missing")

Current directory: c:\Users\B760M-ITX D4 WIFI\Documents\GitHub\ros-security\dataset\models\ac-mi-rf
Files in directory: ['acmirf.ipynb', 'datasetvalidation.ipynb', 'features.txt', 'model.joblib', 'readme.md', 'scaler.joblib', 'used_features.txt']
✅ model.joblib exists
✅ scaler.joblib exists
✅ features.txt exists


In [2]:
# Load model and test basic functionality
from joblib import load
import pandas as pd
import numpy as np

try:
    # Load model, scaler, and features
    model = load("model.joblib")
    scaler = load("scaler.joblib")
    
    with open("features.txt") as f:
        features = [line.strip() for line in f]
    
    print(f"✅ Model loaded successfully: {type(model)}")
    print(f"✅ Scaler loaded successfully: {type(scaler)}")
    print(f"✅ Features loaded: {len(features)} features")
    print(f"First 5 features: {features[:5]}")
    
except Exception as e:
    print(f"❌ Error loading models: {e}")

✅ Model loaded successfully: <class 'sklearn.ensemble._forest.RandomForestClassifier'>
✅ Scaler loaded successfully: <class 'sklearn.preprocessing._data.StandardScaler'>
✅ Features loaded: 78 features
First 5 features: ['Src Port', 'Dst Port', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts']


In [3]:
# Test with actual dataset
try:
    # Load the dataset
    dataset_path = "C:\\Users\\B760M-ITX D4 WIFI\\Downloads\\AC-MI-RF\\NavBot25.csv"
    print(f"Loading dataset from: {dataset_path}")
    
    df = pd.read_csv(dataset_path)
    print(f"✅ Dataset loaded: {df.shape}")
    print(f"Dataset columns (first 10): {list(df.columns)[:10]}")
    
    # Check feature compatibility
    expected_features = list(scaler.feature_names_in_)
    missing = set(expected_features) - set(df.columns)
    extra = set(df.columns) - set(expected_features)
    
    print(f"\nFeature compatibility check:")
    print(f"Expected features: {len(expected_features)}")
    print(f"Dataset columns: {len(df.columns)}")
    print(f"Missing features: {len(missing)} - {list(missing)[:5] if missing else 'None'}")
    print(f"Extra columns: {len(extra)} - {list(extra)[:5] if extra else 'None'}")
    
except Exception as e:
    print(f"❌ Error loading dataset: {e}")

Loading dataset from: C:\Users\B760M-ITX D4 WIFI\Downloads\AC-MI-RF\NavBot25.csv
✅ Dataset loaded: (192213, 84)
Dataset columns (first 10): ['Flow ID', 'Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Protocol', 'Timestamp', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts']

Feature compatibility check:
Expected features: 63
Dataset columns: 84
Missing features: 0 - None
Extra columns: 21 - ['Init Fwd Win Byts', 'URG Flag Cnt', 'CWE Flag Count', 'Fwd URG Flags', 'Bwd Byts/b Avg']


In [4]:
# Complete prediction and analysis
from collections import Counter

# Define ROS security attack mapping
label_map = {
    0: "Normal",
    1: "DoS Attack", 
    2: "UnauthSub Attack",
    3: "SSH Bruteforce",
    4: "Pubflood",
    5: "Subflood", 
    6: "Reverse Shell",
    7: "Port Scanning Attack"
}

def predict_attacks(csv_file, sample_size=1000):
    """
    Predict attacks on dataset
    """
    try:
        # Load and prepare data
        df = pd.read_csv(csv_file)
        
        # Take a sample for faster processing if dataset is large
        if len(df) > sample_size:
            df_sample = df.sample(n=sample_size, random_state=42)
            print(f"Using sample of {sample_size} rows from {len(df)} total rows")
        else:
            df_sample = df
            print(f"Using all {len(df)} rows")
        
        # Keep only model features
        expected_features = list(scaler.feature_names_in_)
        df_clean = df_sample[expected_features].copy()
        
        # Clean data
        df_clean = df_clean.replace([np.inf, -np.inf], np.nan).fillna(0)
        
        # Scale features
        X_scaled = scaler.transform(df_clean)
        
        # Predict
        predictions = model.predict(X_scaled)
        
        # Convert to labels
        predicted_labels = [label_map.get(p, f"Unknown({p})") for p in predictions]
        
        return predicted_labels, len(df)
        
    except Exception as e:
        print(f"❌ Prediction error: {e}")
        return [], 0

# Run prediction
print("🔍 Starting ROS Security Analysis...")
predictions, total_samples = predict_attacks("C:\\Users\\B760M-ITX D4 WIFI\\Downloads\\AC-MI-RF\\NavBot25.csv")

if predictions:
    # Analyze results
    counts = Counter(predictions)
    sample_size = len(predictions)
    
    print(f"\n📊 ROS Security Analysis Results:")
    print(f"Total samples in dataset: {total_samples:,}")
    print(f"Samples analyzed: {sample_size:,}")
    print(f"\nDetected threats:")
    
    for attack_type, count in counts.most_common():
        percentage = (count / sample_size) * 100
        status = "🛡️" if attack_type == "Normal" else "🚨"
        print(f"{status} {attack_type}: {count:,} ({percentage:.2f}%)")
    
    # Security summary
    normal_count = counts.get("Normal", 0)
    attack_count = sample_size - normal_count
    
    print(f"\n🎯 Security Summary:")
    if normal_count == sample_size:
        print("✅ All traffic is NORMAL - No threats detected!")
    elif normal_count == 0:
        print("🚨 ALL traffic contains ATTACKS - Critical security risk!")
    else:
        normal_pct = (normal_count/sample_size)*100
        attack_pct = (attack_count/sample_size)*100
        print(f"⚠️ Mixed traffic: {normal_pct:.1f}% normal, {attack_pct:.1f}% attacks")
        
        # Show most common attacks
        attack_types = {k: v for k, v in counts.items() if k != "Normal"}
        if attack_types:
            print(f"\n🔥 Most common attacks:")
            for attack, count in sorted(attack_types.items(), key=lambda x: x[1], reverse=True)[:3]:
                pct = (count/sample_size)*100
                print(f"   • {attack}: {count:,} cases ({pct:.2f}%)")
else:
    print("❌ No predictions generated")

🔍 Starting ROS Security Analysis...
Using sample of 1000 rows from 192213 total rows

📊 ROS Security Analysis Results:
Total samples in dataset: 192,213
Samples analyzed: 1,000

Detected threats:
🛡️ Normal: 333 (33.30%)
🚨 DoS Attack: 159 (15.90%)
🚨 Reverse Shell: 154 (15.40%)
🚨 Port Scanning Attack: 143 (14.30%)
🚨 UnauthSub Attack: 138 (13.80%)
🚨 SSH Bruteforce: 33 (3.30%)
🚨 Pubflood: 26 (2.60%)
🚨 Subflood: 14 (1.40%)

🎯 Security Summary:
⚠️ Mixed traffic: 33.3% normal, 66.7% attacks

🔥 Most common attacks:
   • DoS Attack: 159 cases (15.90%)
   • Reverse Shell: 154 cases (15.40%)
   • Port Scanning Attack: 143 cases (14.30%)
