<a href="https://colab.research.google.com/github/Nandita64/Auth/blob/main/DoS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pandas scikit-learn numpy



In [17]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# =========================
# Load Dataset
# =========================

# Define column names based on the expected HCRL format
# Timestamp, CAN_ID, DLC, DATA0, DATA1, DATA2, DATA3, DATA4, DATA5, DATA6, DATA7, Label
column_names = ['Timestamp', 'CAN_ID', 'DLC', 'DATA0', 'DATA1', 'DATA2', 'DATA3', 'DATA4', 'DATA5', 'DATA6', 'DATA7', 'Label']
data = pd.read_csv("data/DoS_dataset.csv", header=None, names=column_names)
#data = pd.read_csv("data/DoS_dataset.csv")
# Feature Engineering
data["inter_arrival_time"] = data["Timestamp"].diff().fillna(0)

if data["CAN_ID"].dtype == object:
    # Handle 'nan' values before converting to int from hex
    data["CAN_ID"] = data["CAN_ID"].apply(lambda x: int(str(x), 16) if pd.notna(x) else 0)

data["Label"] = data["Label"].apply(lambda x: 0 if x == "Normal" else 1)

features = ["inter_arrival_time", "CAN_ID"]
X = data[features]
y = data["Label"]

# =========================
# Random Forest (Supervised)
# =========================

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

rf_pred = rf.predict(X_test)

print("===== Random Forest Performance ====")
print("Accuracy :", round(accuracy_score(y_test, rf_pred) * 100, 2), "%")
print("Precision:", round(precision_score(y_test, rf_pred) * 100, 2), "%")
print("Recall   :", round(recall_score(y_test, rf_pred) * 100, 2), "%")
print("F1 Score :", round(f1_score(y_test, rf_pred) * 100, 2), "%")

# =========================
# Isolation Forest (Unsupervised)
# =========================

iso = IsolationForest(contamination=0.05, random_state=42)
iso.fit(X)

iso_pred = iso.predict(X)

# Convert IF output (-1 anomaly, 1 normal) to 1 attack, 0 normal
iso_pred = np.where(iso_pred == -1, 1, 0)

print("\n===== Isolation Forest Performance ====")
print("Accuracy :", round(accuracy_score(y, iso_pred) * 100, 2), "%")

# =========================
# Real-Time Detection Demo
# =========================

sample_data = [[0.0001, 419]] # Example sample data
sample = pd.DataFrame(sample_data, columns=features)

rf_result = rf.predict(sample)
iso_result = iso.predict(sample)

print("\n===== Real-Time Scan ====")

if rf_result[0] == 1:
    print("Random Forest: ALERT")
else:
    print("Random Forest: Normal")

if iso_result[0] == -1:
    print("Isolation Forest: ANOMALY")
else:
    print("Isolation Forest: Normal")

===== Random Forest Performance ====
Accuracy : 100.0 %
Precision: 100.0 %
Recall   : 100.0 %
F1 Score : 100.0 %

===== Isolation Forest Performance ====
Accuracy : 5.0 %

===== Real-Time Scan ====
Random Forest: ALERT
Isolation Forest: Normal
