<a href="https://colab.research.google.com/github/Sarika8310/AV-Intrusion-System/blob/main/AV_Intrusion_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 🚗 PHASE 1 – Final Version (Cleaned for DoS + RPM datasets)

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# ✅ Step 1: Load CSVs
dos_path = "/content/DoS_dataset.csv"
rpm_path = "/content/RPM_dataset.csv"

df_dos = pd.read_csv(dos_path, header=None)
df_rpm = pd.read_csv(rpm_path, header=None)

# ✅ Step 2: Add correct column headers (12 total)
columns = ['Time', 'ID', 'Len', 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'Class']
df_dos.columns = columns
df_rpm.columns = columns

# ✅ Step 3: Label datasets manually
df_dos['Class'] = 1  # Attack
df_rpm['Class'] = 0  # Normal

# ✅ Step 4: Merge datasets
df = pd.concat([df_rpm, df_dos], ignore_index=True)

# ✅ Step 5: Drop 'Time' column (not useful)
df = df.drop(['Time'], axis=1)

# ✅ Step 6: Convert hex strings to integers wherever possible
for col in df.columns:
    if col != 'Class':
        df[col] = df[col].apply(
            lambda x: int(str(x), 16) if isinstance(x, str) and all(c in '0123456789abcdefABCDEF' for c in str(x)) else x
        )

# ✅ Step 7: Clean up — convert all to numeric, drop bad rows
X = df.drop('Class', axis=1)
X = X.apply(pd.to_numeric, errors='coerce')  # invalid strings -> NaN
X = X.dropna()  # drop rows with NaN
y = df['Class'].loc[X.index]  # match labels

# ✅ Step 8: Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ✅ Step 9: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y.reset_index(drop=True), test_size=0.2, random_state=42
)

print("✅ Phase 1 complete — data is cleaned, encoded, scaled, and split.")
print("🟢 X_train shape:", X_train.shape)
print("🟢 y_train value counts:\n", y_train.value_counts())


✅ Phase 1 complete — data is cleaned, encoded, scaled, and split.
🟢 X_train shape: (1459944, 10)
🟢 y_train value counts:
 Class
0    1219207
1     240737
Name: count, dtype: int64


In [None]:
# 🚗 PHASE 2 – Random Forest Model

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib  # for saving the model

# ✅ Step 1: Train model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train)

# ✅ Step 2: Predictions
y_pred = rf_model.predict(X_test)

# ✅ Step 3: Evaluation
acc = accuracy_score(y_test, y_pred)
print("✅ Accuracy:", round(acc * 100, 2), "%")

print("\n📊 Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\n📄 Classification Report:")
print(classification_report(y_test, y_pred))

# ✅ Step 4: Save model (optional)
joblib.dump(rf_model, "random_forest_car_model.pkl")
print("✅ Model saved as random_forest_car_model.pkl")


✅ Accuracy: 93.83 %

📊 Confusion Matrix:
[[301904   2847]
 [ 19672  40563]]

📄 Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.99      0.96    304751
           1       0.93      0.67      0.78     60235

    accuracy                           0.94    364986
   macro avg       0.94      0.83      0.87    364986
weighted avg       0.94      0.94      0.93    364986

✅ Model saved as random_forest_car_model.pkl


In [None]:
import joblib

# Save model and scaler
joblib.dump(model, "random_forest_car_model.pkl")
joblib.dump(scaler, "scaler.pkl")


['scaler.pkl']

In [None]:
from google.colab import files
files.download("random_forest_car_model.pkl")
files.download("scaler.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import pandas as pd
import time
import joblib
from sklearn.preprocessing import StandardScaler

# Step 1: Load the saved model
model = joblib.load("/content/random_forest_car_model.pkl")

# Step 2: Reuse and fit the scaler (with X_train from Phase 1)
scaler = StandardScaler()
scaler.fit(X_train)  # Make sure X_train is still in memory

# Step 3: Sample 10 random rows from your full DataFrame (df)
sample = df.sample(10).drop(columns=['Class'])

# Step 4: Convert hex-like values to integers (in case you reloaded df)
for col in sample.columns:
    sample[col] = sample[col].apply(lambda x: int(str(x), 16)
                                    if isinstance(x, str) and all(c in '0123456789abcdefABCDEF' for c in str(x))
                                    else x)

# Step 5: Scale the sample
sample_scaled = scaler.transform(sample)

# Step 6: Simulate real-time prediction
print("🚗 Real-Time Intrusion Detection Simulation Started...\n")
for i, row in enumerate(sample_scaled):
    time.sleep(1)  # Simulate time delay like real CAN bus signals
    pred = model.predict([row])[0]
    status = "🚨 ATTACK DETECTED!" if pred == 1 else "✅ Normal Signal"
    print(f"[Frame {i+1}] Status: {status}")




🚗 Real-Time Intrusion Detection Simulation Started...

[Frame 1] Status: ✅ Normal Signal
[Frame 2] Status: ✅ Normal Signal
[Frame 3] Status: ✅ Normal Signal
[Frame 4] Status: ✅ Normal Signal
[Frame 5] Status: ✅ Normal Signal
[Frame 6] Status: 🚨 ATTACK DETECTED!
[Frame 7] Status: ✅ Normal Signal
[Frame 8] Status: 🚨 ATTACK DETECTED!
[Frame 9] Status: ✅ Normal Signal
[Frame 10] Status: 🚨 ATTACK DETECTED!


In [None]:
import pandas as pd
import time
import joblib
from datetime import datetime
from sklearn.preprocessing import StandardScaler

# Load trained model
model = joblib.load("/content/random_forest_car_model.pkl")

# Reuse scaler
scaler = StandardScaler()
scaler.fit(X_train)

# Sample 15 rows for simulation
sample = df.sample(15).drop(columns=['Class'])

# Convert hex-like to int
for col in sample.columns:
    sample[col] = sample[col].apply(lambda x: int(str(x), 16)
                                    if isinstance(x, str) and all(c in '0123456789abcdefABCDEF' for c in str(x))
                                    else x)

# Scale input
sample_scaled = scaler.transform(sample)

# Prepare logging
log = []

print("🚗 Intrusion Detection System with Logging Started...\n")

for i, row in enumerate(sample_scaled):
    time.sleep(1)
    prediction = model.predict([row])[0]
    probability = model.predict_proba([row])[0][prediction]  # Confidence score

    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    status = "ATTACK" if prediction == 1 else "NORMAL"
    confidence = round(probability * 100, 2)

    print(f"[{timestamp}] Frame {i+1} ➜ {status} ({confidence}%)")

    # Log this frame
    log.append({
        "Timestamp": timestamp,
        "Frame": i + 1,
        "Status": status,
        "Confidence (%)": confidence
    })

# Save to CSV
log_df = pd.DataFrame(log)
log_df.to_csv("intrusion_log.csv", index=False)
print("\n✅ All detection results saved to 'intrusion_log.csv'")




🚗 Intrusion Detection System with Logging Started...

[2025-06-24 07:14:21] Frame 1 ➜ ATTACK (60.59%)
[2025-06-24 07:14:22] Frame 2 ➜ NORMAL (55.86%)
[2025-06-24 07:14:23] Frame 3 ➜ NORMAL (59.86%)
[2025-06-24 07:14:24] Frame 4 ➜ NORMAL (57.01%)
[2025-06-24 07:14:25] Frame 5 ➜ NORMAL (57.01%)
[2025-06-24 07:14:26] Frame 6 ➜ ATTACK (60.59%)
[2025-06-24 07:14:27] Frame 7 ➜ NORMAL (68.15%)
[2025-06-24 07:14:28] Frame 8 ➜ NORMAL (64.37%)
[2025-06-24 07:14:29] Frame 9 ➜ NORMAL (75.71%)
[2025-06-24 07:14:31] Frame 10 ➜ NORMAL (57.01%)
[2025-06-24 07:14:32] Frame 11 ➜ NORMAL (79.13%)
[2025-06-24 07:14:33] Frame 12 ➜ ATTACK (54.99%)
[2025-06-24 07:14:34] Frame 13 ➜ ATTACK (51.55%)
[2025-06-24 07:14:35] Frame 14 ➜ ATTACK (50.25%)
[2025-06-24 07:14:36] Frame 15 ➜ ATTACK (60.59%)

✅ All detection results saved to 'intrusion_log.csv'


In [None]:
pip install streamlit

Collecting streamlit
  Downloading streamlit-1.46.0-py3-none-any.whl.metadata (9.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.46.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m44.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m47.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hI

2025-06-24 07:17:37.970 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]


In [None]:
code = '''
import streamlit as st
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler

# Page settings
st.set_page_config(page_title="AV Intrusion Detector", layout="centered")
st.title("🚗 Smart Car Intrusion Detection")
st.markdown("Upload CAN bus data to detect possible intrusions in real time.")

# Load model
model = joblib.load("random_forest_car_model.pkl")

# Simulate the training scaler
# (Optional: You can save & load scaler with joblib too if needed)
scaler = StandardScaler()
example_data = pd.read_csv("RPM_dataset.csv", header=None)
example_data.columns = ['Time', 'ID', 'Len', 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'Class']
example_data = example_data.drop(columns=['Time', 'Class'], errors='ignore')

# Hex conversion for training data
for col in example_data.columns:
    example_data[col] = example_data[col].apply(
        lambda x: int(str(x), 16) if isinstance(x, str) and all(c in '0123456789abcdefABCDEF' for c in str(x)) else x
    )

example_data = example_data.apply(pd.to_numeric, errors='coerce')
example_data = example_data.dropna()
scaler.fit(example_data)

# Upload CSV
uploaded_file = st.file_uploader("📤 Upload CAN Bus CSV File", type=["csv"])

if uploaded_file:
    df = pd.read_csv(uploaded_file)

    # Clean incoming data
    df = df.drop(columns=[col for col in ['Time', 'Class'] if col in df.columns], errors='ignore')
    df = df.dropna()

    for col in df.columns:
        df[col] = df[col].apply(
            lambda x: int(str(x), 16) if isinstance(x, str) and all(c in '0123456789abcdefABCDEF' for c in str(x)) else x
        )

    df = df.apply(pd.to_numeric, errors='coerce')
    df = df.dropna()

    # Scale & Predict
    df_scaled = scaler.transform(df)
    predictions = model.predict(df_scaled)
    confidences = model.predict_proba(df_scaled)

    df['Status'] = ['ATTACK' if p == 1 else 'NORMAL' for p in predictions]
    df['Confidence (%)'] = [round(max(c) * 100, 2) for c in confidences]

    st.success("✅ Intrusion Detection Complete")
    st.dataframe(df)

    # Download option
    result_csv = df.to_csv(index=False).encode('utf-8')
    st.download_button("⬇️ Download Detection Results", result_csv, "intrusion_results.csv", "text/csv")

else:
    st.info("Upload a CSV file to begin.")

'''

# 🔽 Save as app.py
with open("app.py", "w") as f:
    f.write(code)

# 🔽 Download app.py to your computer
from google.colab import files
files.download("app.py")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>