In [ ]:
# AI-Based Intrusion Detection System - Beginner Version
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [ ]:
# Step 1: Load dataset
df = pd.read_csv('../data/KDDTrain+.csv')
df.head()

In [ ]:
# Step 2: Encode categorical columns
encoder = LabelEncoder()
df['protocol_type'] = encoder.fit_transform(df['protocol_type'])
df['service'] = encoder.fit_transform(df['service'])
df['flag'] = encoder.fit_transform(df['flag'])
X = df.drop('label', axis=1)
y = df['label']

In [ ]:
# Step 3: Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [ ]:
# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [ ]:
# Step 5: Train Random Forest model
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

In [ ]:
# Step 6: Evaluate and visualize
y_pred = model.predict(X_test)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Classification Report Heatmap
report = classification_report(y_test, y_pred, output_dict=True)
plt.figure(figsize=(10,6))
sns.heatmap(pd.DataFrame(report).iloc[:-1, :].T, annot=True)
plt.title('Classification Report Metrics')
plt.show()

# Overall Accuracy
acc = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {acc*100:.2f}%")
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

In [ ]:
# Step 7: Save trained model
joblib.dump(model, '../models/intrusion_model.pkl')
print("Model saved to /models/intrusion_model.pkl")