# URL Security Classification using Q-Learning

This notebook provides an interactive way to run the complete pipeline.


In [1]:
# Import all necessary modules
import sys
from data_preparation import DataPreparator
from environment import URLSecurityEnvironment
from q_learning import QLearningAgent
from baseline import RuleBasedBaseline
from training import train_q_learning_agent
from evaluation import (evaluate_agent, evaluate_baseline, generate_all_visualizations, 
                       save_evaluation_results)
import numpy as np


## 1. Data Preparation


In [2]:
# Initialize data preparator
preparator = DataPreparator(dataset_path='dataset/csic_database.csv')

# Load data
data = preparator.load_data()

# Extract features
features = preparator.extract_features(data)

# Fit discretization
preparator.fit_discretization(features.drop('label', axis=1), method='binning')
num_states = preparator.get_num_states()
print(f"Total number of discrete states: {num_states}")

# Split data
train_data, val_data, test_data = preparator.split_data(features)


Loading dataset from dataset/csic_database.csv...
Loaded 61065 samples
Benign samples: 36000, Attack samples: 25065
Extracting features from URLs...
Feature extraction complete. Shape: (61065, 6)
Discretization fitted using binning method
Total number of discrete states: 162
Data split:
  Training: 42745 samples
  Validation: 9160 samples
  Test: 9160 samples


## 2. Baseline Approach


In [3]:
# Initialize baseline
baseline = RuleBasedBaseline(K=2)

# Tune threshold on validation set
baseline.tune_threshold(val_data.drop('label', axis=1), val_data['label'].values)


Tuned K to 0 with accuracy 0.6366


(0, np.float64(0.6365720524017467))

## 3. RL Environment Setup


In [4]:
# Create environment
env = URLSecurityEnvironment(train_data, preparator, episode_length=1000)
print(f"Environment created with episode length: {env.episode_length}")


Environment created with episode length: 1000


## 4. Q-Learning Agent Setup


In [5]:
# Initialize Q-learning agent
agent = QLearningAgent(
    num_states=num_states,
    num_actions=2,
    alpha=0.1,
    gamma=0.90,
    epsilon_start=1.0,
    epsilon_min=0.05,
    epsilon_decay=0.98
)
print(f"Q-learning agent initialized with {num_states} states")


Q-learning agent initialized with 162 states


## 5. Training


In [6]:
# Train the agent
num_episodes = 15
training_stats = train_q_learning_agent(env, agent, num_episodes=num_episodes, verbose=True)

# Save Q-table
agent.save_q_table('q_table.npy')
agent.save_agent('q_learning_agent.pkl')


Starting training for 15 episodes...
Initial epsilon: 1.0000
Episode 1/15 - Reward: -2709.50, Avg Reward (last 10): -2709.50, Epsilon: 0.9800
Episode 2/15 - Reward: -2411.50, Avg Reward (last 10): -2411.50, Epsilon: 0.9604
Episode 3/15 - Reward: -2404.00, Avg Reward (last 10): -2404.00, Epsilon: 0.9412
Episode 4/15 - Reward: -2591.00, Avg Reward (last 10): -2591.00, Epsilon: 0.9224
Episode 5/15 - Reward: -2574.00, Avg Reward (last 10): -2574.00, Epsilon: 0.9039
Episode 6/15 - Reward: -2633.50, Avg Reward (last 10): -2633.50, Epsilon: 0.8858
Episode 7/15 - Reward: -2569.00, Avg Reward (last 10): -2569.00, Epsilon: 0.8681
Episode 8/15 - Reward: -2762.50, Avg Reward (last 10): -2762.50, Epsilon: 0.8508
Episode 9/15 - Reward: -2358.00, Avg Reward (last 10): -2358.00, Epsilon: 0.8337
Episode 10/15 - Reward: -2451.50, Avg Reward (last 10): -2546.45, Epsilon: 0.8171
Episode 11/15 - Reward: -2497.50, Avg Reward (last 10): -2525.25, Epsilon: 0.8007
Episode 12/15 - Reward: -2218.50, Avg Reward (

## 6. Evaluation


In [7]:
# Evaluate RL agent
rl_results = evaluate_agent(agent, env, preparator, test_data, verbose=True)

# Evaluate baseline
baseline_results = evaluate_baseline(baseline, test_data, verbose=True)

# Generate visualizations
generate_all_visualizations(agent, rl_results, baseline_results, output_dir='plots')

# Save evaluation results
save_evaluation_results(rl_results, baseline_results, 'evaluation_results.json')



RL Agent Evaluation Results
Accuracy:  0.6810
Precision: 0.6852
Recall:    0.4122
F1-Score:  0.5148
Avg Reward: -1.7245

Confusion Matrix:
              Predicted
              ALLOW  BLOCK
Actual Benign  4688    712
Actual Attack  2210   1550

Baseline Evaluation Results
Accuracy:  0.6443
Precision: 0.6303
Recall:    0.3229
F1-Score:  0.4270
Avg Reward: -2.0913

Confusion Matrix:
              Predicted
              ALLOW  BLOCK
Actual Benign  4688    712
Actual Attack  2546   1214
Learning curve saved to plots\learning_curve.png
Confusion matrix saved to plots\confusion_matrix_rl.png
Confusion matrix saved to plots\confusion_matrix_baseline.png
Q-table has 162 states, showing first 100 for visualization
Q-table heatmap saved to plots\q_table_heatmap.png
Evaluation results saved to evaluation_results.json


## 7. Generate Report


In [8]:
# Import report generation function from main
from main import generate_report

# Generate report
generate_report(agent, preparator, rl_results, baseline_results, training_stats)
print("Report generated and saved to report.md")


Report generated and saved to report.md
Report generated and saved to report.md


## Summary

The complete pipeline has been executed. Check the output files:
- `q_table.npy`: Saved Q-table
- `q_learning_agent.pkl`: Saved agent
- `evaluation_results.json`: Evaluation metrics
- `plots/`: Visualizations
- `report.md`: Comprehensive report
