# Week 6: Decision Trees and Random Forests

In this notebook, we apply decision tree and random forest classifiers to predict the severity level of cybersecurity incidents using network traffic data.


In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load data
df = pd.read_csv("cybersecurity_attacks.csv")
df = df[df["Severity Level"].notna()]
severity_map = {"Low": 0, "Medium": 1, "High": 2, "Critical": 3}
df["Severity_Level_Num"] = df["Severity Level"].map(severity_map)

# Select features
features = ["Packet Length", "Anomaly Scores", "Source Port", "Destination Port"]
X = df[features]
y = df["Severity_Level_Num"]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Decision Tree
dt = DecisionTreeClassifier(max_depth=5, random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
acc_dt = accuracy_score(y_test, y_pred_dt)

# Random Forest
rf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
acc_rf = accuracy_score(y_test, y_pred_rf)


In [3]:
results = pd.DataFrame({
    "Model": ["Decision Tree", "Random Forest"],
    "Accuracy": [acc_dt, acc_rf]
})
results

Unnamed: 0,Model,Accuracy
0,Decision Tree,0.322625
1,Random Forest,0.32575
