In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load the dataset
file_path = "RT_IOT2022.csv"  # Update this if the file is in a different location
df = pd.read_csv(file_path)

# Drop unnecessary columns
df = df.drop(columns=["Unnamed: 0"], errors='ignore')
# Encode categorical features
label_encoder = LabelEncoder()
df["proto"] = label_encoder.fit_transform(df["proto"])
df["service"] = label_encoder.fit_transform(df["service"])
df["Attack_type"] = label_encoder.fit_transform(df["Attack_type"])  # Target variable

# Separate features and target
X = df.drop(columns=["Attack_type"])
y = df["Attack_type"]

# Normalize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and test sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

print("Dataset loaded and preprocessed successfully!")


Dataset loaded and preprocessed successfully!


In [None]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict
from imblearn.combine import SMOTETomek
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix

# Apply Hybrid Sampling (SMOTE + Tomek Links)
smote_tomek = SMOTETomek()
X_train_bal, y_train_bal = smote_tomek.fit_resample(X_train, y_train)

# Normalize Features
scaler = StandardScaler()
X_train_bal = scaler.fit_transform(X_train_bal)
X_test_scaled = scaler.transform(X_test)

# Hyperparameters for teacher model
teacher_params = {
    "alpha": 0.1,  # Learning rate
    "gamma": 0.9,  # Discount factor
    "epsilon": 1.0,  # Exploration rate
    "epsilon_decay": 0.995,
    "epsilon_min": 0.01,
    "num_episodes": 1000,
    "bins": 10  # For state discretization
}

# Hyperparameters for student model (simpler/faster model)
student_params = {
    "alpha": 0.2,  # Higher learning rate for faster convergence
    "gamma": 0.8,  # Slightly lower discount factor for simpler behavior
    "epsilon": 0.5,  # Lower initial exploration (leveraging teacher knowledge)
    "epsilon_decay": 0.99,
    "epsilon_min": 0.05,
    "num_episodes": 500,  # Fewer episodes needed due to knowledge transfer
    "bins": 5  # Coarser state discretization for faster lookups
}

# Get number of actions from the training data
num_actions = len(np.unique(y_train_bal))

# Initialize Q-tables for teacher and student
Q_teacher = defaultdict(lambda: np.zeros(num_actions))
Q_student = defaultdict(lambda: np.zeros(num_actions))

# Function to discretize states for teacher
def discretize_state_teacher(state):
    """Convert continuous state to discrete state for teacher Q-table lookup"""
    discretized = tuple(np.floor(state * teacher_params["bins"]).astype(int))
    return discretized

# Function to discretize states for student (coarser discretization)
def discretize_state_student(state):
    """Convert continuous state to discrete state for student Q-table lookup"""
    discretized = tuple(np.floor(state * student_params["bins"]).astype(int))
    return discretized

# Epsilon-greedy action selection for teacher
def choose_action_teacher(state, epsilon):
    discretized_state = discretize_state_teacher(state)
    
    if np.random.rand() < epsilon:
        return random.choice(range(num_actions))
    else:
        return np.argmax(Q_teacher[discretized_state])

# Epsilon-greedy action selection for student
def choose_action_student(state, epsilon):
    discretized_state = discretize_state_student(state)
    
    if np.random.rand() < epsilon:
        return random.choice(range(num_actions))
    else:
        return np.argmax(Q_student[discretized_state])

# Training function for the teacher Q-learning model
def train_teacher_model():
    print("Training teacher Q-learning model...")
    epsilon = teacher_params["epsilon"]
    epsilon_history = []
    
    for episode in range(teacher_params["num_episodes"]):
        # Shuffle data at the beginning of each episode
        indices = np.arange(len(X_train_bal))
        np.random.shuffle(indices)
        X_shuffled = X_train_bal[indices]
        y_shuffled = y_train_bal[indices]
        
        for i in range(len(X_shuffled)):
            state = X_shuffled[i]
            discretized_state = discretize_state_teacher(state)
            action = choose_action_teacher(state, epsilon)
            
            # Reward Function
            if action == y_shuffled[i]:
                reward = 10 if action != 0 else 5
            else:
                reward = -10 if action != 0 else -5
            
            # Get next state
            next_state = X_shuffled[(i + 1) % len(X_shuffled)]
            discretized_next_state = discretize_state_teacher(next_state)
            
            # Q-learning update
            best_next_action = np.argmax(Q_teacher[discretized_next_state])
            td_target = reward + teacher_params["gamma"] * Q_teacher[discretized_next_state][best_next_action]
            td_error = td_target - Q_teacher[discretized_state][action]
            Q_teacher[discretized_state][action] += teacher_params["alpha"] * td_error
        
        # Store epsilon for plotting
        epsilon_history.append(epsilon)
        
        # Decay epsilon
        epsilon = max(teacher_params["epsilon_min"], epsilon * teacher_params["epsilon_decay"])
        
        if episode % 100 == 0:
            print(f"Episode {episode}/{teacher_params['num_episodes']}, Epsilon: {epsilon:.4f}")
    
    print("Teacher model training complete!")
    return epsilon_history

# Knowledge distillation function to transfer knowledge from teacher to student
def distill_knowledge_to_student():
    print("Performing knowledge distillation to student Q-learning model...")
    epsilon = student_params["epsilon"]
    distillation_history = []
    
    # Temperature parameter for softening Q-values
    temperature = 2.0
    
    for episode in range(student_params["num_episodes"]):
        episode_loss = 0
        
        # Shuffle data at the beginning of each episode
        indices = np.arange(len(X_train_bal))
        np.random.shuffle(indices)
        X_shuffled = X_train_bal[indices]
        
        for i in range(len(X_shuffled)):
            state = X_shuffled[i]
            teacher_state = discretize_state_teacher(state)
            student_state = discretize_state_student(state)
            
            # Get teacher's Q-values (soft targets)
            teacher_q_values = Q_teacher[teacher_state]
            
            # Convert to probabilities via softmax with temperature
            teacher_q = teacher_q_values / temperature
            exp_teacher_q = np.exp(teacher_q - np.max(teacher_q))  # For numerical stability
            teacher_probs = exp_teacher_q / np.sum(exp_teacher_q)
            
            # Student selects action (with some exploration)
            if np.random.rand() < epsilon:
                action = random.choice(range(num_actions))
            else:
                # Sometimes follow teacher's advice, sometimes use own policy
                if np.random.rand() < 0.7:  # 70% follow teacher
                    action = np.argmax(teacher_q_values)
                else:
                    action = np.argmax(Q_student[student_state])
            
            # Update student's Q-values to mimic teacher's Q-values
            for a in range(num_actions):
                # Calculate loss (MSE between teacher and student Q-values)
                target_q = teacher_q_values[a]
                current_q = Q_student[student_state][a]
                loss = (target_q - current_q) ** 2
                episode_loss += loss
                
                # Update student's Q-value toward teacher's Q-value
                if a == action:
                    # Stronger update for the selected action
                    Q_student[student_state][a] += student_params["alpha"] * (target_q - current_q)
                else:
                    # Weaker updates for non-selected actions (optional)
                    Q_student[student_state][a] += 0.01 * student_params["alpha"] * (target_q - current_q)
        
        # Store average loss for plotting
        distillation_history.append(episode_loss / len(X_shuffled))
        
        # Decay epsilon
        epsilon = max(student_params["epsilon_min"], epsilon * student_params["epsilon_decay"])
        
        if episode % 50 == 0:
            print(f"Distillation Episode {episode}/{student_params['num_episodes']}, Epsilon: {epsilon:.4f}, Loss: {distillation_history[-1]:.4f}")
    
    print("Knowledge distillation complete!")
    return distillation_history

# Evaluation function for Q-learning models
def evaluate_q_model(Q_table, discretize_fn, X_data, y_data, model_name):
    y_pred = []
    for state in X_data:
        discretized_state = discretize_fn(state)
        action = np.argmax(Q_table[discretized_state])
        y_pred.append(action)
    
    y_pred = np.array(y_pred)
    accuracy = accuracy_score(y_data, y_pred)
    precision = precision_score(y_data, y_pred, average="weighted")
    recall = recall_score(y_data, y_pred, average="weighted")
    f1 = f1_score(y_data, y_pred, average="weighted")
    cm = confusion_matrix(y_data, y_pred)
    
    print(f"\n{model_name} Evaluation:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    
    return {
        "Model": model_name,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1,
        "Confusion Matrix": cm
    }

# Train teacher model
teacher_epsilon_history = train_teacher_model()

# Perform knowledge distillation
distillation_loss_history = distill_knowledge_to_student()

# Evaluate both models on test data
teacher_metrics = evaluate_q_model(Q_teacher, discretize_state_teacher, X_test_scaled, y_test, "Teacher Q-Learning")
student_metrics = evaluate_q_model(Q_student, discretize_state_student, X_test_scaled, y_test, "Student Q-Learning")

# Display comparative results in a DataFrame
results = [
    teacher_metrics,
    student_metrics
]
results_df = pd.DataFrame([
    {k: v for k, v in result.items() if k != "Confusion Matrix"} 
    for result in results
])
print("\nModel Comparison:")
print(results_df)

# Analyze model size and efficiency
teacher_size = len(Q_teacher)
student_size = len(Q_student)
size_reduction = (1 - student_size / teacher_size) * 100 if teacher_size > 0 else 0

print(f"\nModel Size Comparison:")
print(f"Teacher Q-table entries: {teacher_size}")
print(f"Student Q-table entries: {student_size}")
print(f"Size reduction: {size_reduction:.2f}%")

# Plotting
plt.figure(figsize=(15, 10))

# Plot epsilon decay during teacher training
plt.subplot(2, 2, 1)
plt.plot(teacher_epsilon_history)
plt.title("Teacher Model: Epsilon Decay")
plt.xlabel("Episodes")
plt.ylabel("Epsilon")

# Plot loss during knowledge distillation
plt.subplot(2, 2, 2)
plt.plot(distillation_loss_history)
plt.title("Knowledge Distillation: Loss Over Episodes")
plt.xlabel("Episodes")
plt.ylabel("Average Loss")

# Plot performance comparison
plt.subplot(2, 2, 3)
metrics = ["Accuracy", "Precision", "Recall", "F1 Score"]
x = np.arange(len(metrics))
width = 0.35

teacher_values = [teacher_metrics[m] for m in metrics]
student_values = [student_metrics[m] for m in metrics]

plt.bar(x - width/2, teacher_values, width, label='Teacher Q-Learning')
plt.bar(x + width/2, student_values, width, label='Student Q-Learning')
plt.xlabel('Metrics')
plt.ylabel('Score')
plt.title('Performance Comparison')
plt.xticks(x, metrics)
plt.legend()

# Plot comparison of Q-value distributions
plt.subplot(2, 2, 4)

# Extract Q-values from both models
teacher_q_values = np.array([np.max(Q_teacher[state]) for state in list(Q_teacher.keys())[:100]])
student_q_values = np.array([np.max(Q_student[state]) for state in list(Q_student.keys())[:100]])

plt.hist(teacher_q_values, alpha=0.5, label='Teacher Q-values')
plt.hist(student_q_values, alpha=0.5, label='Student Q-values')
plt.title('Q-value Distributions (Sample)')
plt.xlabel('Q-value')
plt.ylabel('Frequency')
plt.legend()

plt.tight_layout()
plt.show()

# Plot confusion matrices
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.imshow(teacher_metrics["Confusion Matrix"], cmap='Blues')
plt.title('Teacher Q-Learning Confusion Matrix')
plt.colorbar()
plt.xlabel('Predicted Label')
plt.ylabel('True Label')

plt.subplot(1, 2, 2)
plt.imshow(student_metrics["Confusion Matrix"], cmap='Blues')
plt.title('Student Q-Learning Confusion Matrix')
plt.colorbar()
plt.xlabel('Predicted Label')
plt.ylabel('True Label')



plt.tight_layout()
plt.show()

Training teacher Q-learning model...
Episode 0/1000, Epsilon: 0.9950
Episode 100/1000, Epsilon: 0.6027
Episode 200/1000, Epsilon: 0.3651
Episode 300/1000, Epsilon: 0.2212
Episode 400/1000, Epsilon: 0.1340
Episode 500/1000, Epsilon: 0.0812
Episode 600/1000, Epsilon: 0.0492
Episode 700/1000, Epsilon: 0.0298
Episode 800/1000, Epsilon: 0.0180
Episode 900/1000, Epsilon: 0.0109
Teacher model training complete!
Performing knowledge distillation to student Q-learning model...
Distillation Episode 0/500, Epsilon: 0.4950, Loss: 5451.1331
Distillation Episode 50/500, Epsilon: 0.2995, Loss: 231.6604
Distillation Episode 100/500, Epsilon: 0.1812, Loss: 74.0678
Distillation Episode 150/500, Epsilon: 0.1096, Loss: 31.2263
Distillation Episode 200/500, Epsilon: 0.0663, Loss: 16.5936
Distillation Episode 250/500, Epsilon: 0.0500, Loss: 11.0309
