In [18]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/q_guard/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/q_guard


## 0. PIP Install

In [None]:
! apt-get update
! apt-get install -y libzbar0

! pip install -q qrcode pyzbar

## 1. Dataset

In [None]:
# 1-1. URL 구축(Fast Test: 2,000 | Normal 20,000 | Large 50,000)
! python -m src.data.build_manifest \
  --kisa_csv data/raw/kisa_db.csv \
  --kakao_csv data/raw/kakao_db.csv \
  --normal_csv data/raw/normal_urls.csv \
  --normal_limit 2000 \
  --phish_limit 2000 \
  --balance_ratio 1.0 \
  --dedup_by_url_norm \
  --split_by url \
  --out_dir data/processed \
  --seed 42

[STEP] build kisa...
Normalize URLs (kisa): 100% 27582/27582 [00:00<00:00, 90966.79row/s]
  kisa: 27567
[STEP] build kakao...
Extract URLs (kakao): 100% 19009/19009 [00:00<00:00, 119490.81row/s]
Normalize URLs (kakao): 100% 3045/3045 [00:00<00:00, 77932.02row/s]
  kakao: 3037
[STEP] build normal...
Normalize URLs (normal): 100% 2000/2000 [00:00<00:00, 142496.19row/s]
  normal: 2000 (limit=2000)
[STEP] merge...
[STEP] dedup by url_norm: 32604 -> 21505
[STEP] phish_limit=2000 -> phish=2000, benign=2484
[STEP] balance_ratio=1.0 -> phish=2000, benign=2000
Compute eTLD+1 (domain_reg): 100% 4000/4000 [00:00<00:00, 197576.59row/s]
[STEP] split...
[AUDIT] url_norm overlap train-val=0, train-test=0, val-test=0
[AUDIT] domain_reg overlap train-val=38, train-test=49, val-test=19
[OK] manifest written to: data/processed
  total: 4000 label1(phish): 2000 label0(benign): 2000
  train/val/test: 2800 400 800


In [None]:
# 1-2. QR 생성(5~15분 소요)
! python -m src.qr.generate_qr \
  --manifest_path data/processed/manifest.csv \
  --out_dir data/qr_images \
  --update_manifest_out data/processed/manifest_with_qr.csv \
  --ecc H --box_size 10 --border 4

Generate QR: 100% 4000/4000 [01:17<00:00, 51.53qr/s, saved=4000]
Saved QR images under: data/qr_images
Wrote updated manifest: data/processed/manifest_with_qr.csv


In [None]:
# 1-3. URL + QR 데이터셋 생성
! python -m src.data.attach_qr_paths \
  --manifest_with_qr data/processed/manifest_with_qr.csv \
  --splits_dir data/processed \
  --out_dir data/processed

Wrote: data/processed/train_with_qr.csv
Wrote: data/processed/val_with_qr.csv
Wrote: data/processed/test_with_qr.csv


## 2. Real World Data Augmentation

In [None]:
# 2-1. 기본 QR 현실 데이터 증강 모듈
! python -m src.qr.augment_qr \
  --input_dir data/qr_images \
  --out_dir data/qr_images_aug \
  --n_per_image 2 \
  --strength light \
  --context_mode none \
  --seed 42

Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/content/drive/MyDrive/q_guard/src/qr/augment_qr.py", line 34, in <module>
    from .augmentations import (
ImportError: cannot import name 'ContextConfig' from 'src.qr.augmentations' (/content/drive/MyDrive/q_guard/src/qr/augmentations.py)


In [None]:
# 2-2. 'QR + 현실 배경 합성'이 포함된 강화된 증강 모듈
! python -m src.qr.augment_qr \
  --input_dir data/qr_images \
  --out_dir data/qr_images_aug \
  --n_per_image 2 \
  --strength strong \
  --background_dir assets/backgrounds \
  --context_mode mix \
  --context_prob 0.75 \
  --output_size 512 \
  --decode_filter \
  --save_meta_csv data/processed/qr_aug_meta.csv \
  --seed 42

Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/content/drive/MyDrive/q_guard/src/qr/augment_qr.py", line 34, in <module>
    from .augmentations import (
ImportError: cannot import name 'ContextConfig' from 'src.qr.augmentations' (/content/drive/MyDrive/q_guard/src/qr/augmentations.py)


## 3. Fusion Training Models

In [None]:
# 3-1. QR 이미지 + Contxt Branch 모델
! python -m src.train.train_qr \
  --train_csv data/processed/train_with_qr.csv \
  --val_csv data/processed/val_with_qr.csv \
  --test_csv data/processed/test_with_qr.csv \
  --out_dir artifacts/models/qr \
  --augment_strength light \
  --use_context \
  --balance_sampler \
  --calibrate \
  --warn_fpr 0.01 \
  --block_fpr 0.001

Device: cuda
Train ep1: 100% 44/44 [02:40<00:00,  3.65s/batch, loss=0.641]
[VAL ep1] {'tn': 142, 'fp': 58, 'fn': 48, 'tp': 152, 'precision': 0.7238095238095203, 'recall_tpr': 0.7599999999999962, 'f1': 0.741463414633643, 'fpr': 0.28999999999999854, 'tpr': 0.7599999999999962, 'n': 400, 'roc_auc': 0.826275, 'pr_auc': 0.8577328562486742, 'ece': 0.09619327081367375}
Train ep2: 100% 44/44 [02:36<00:00,  3.55s/batch, loss=0.55]
[VAL ep2] {'tn': 200, 'fp': 0, 'fn': 126, 'tp': 74, 'precision': 0.9999999999999866, 'recall_tpr': 0.36999999999999816, 'f1': 0.5401459854010616, 'fpr': 0.0, 'tpr': 0.36999999999999816, 'n': 400, 'roc_auc': 0.8689749999999999, 'pr_auc': 0.8891987095781047, 'ece': 0.2980509625483682}
Train ep3: 100% 44/44 [02:34<00:00,  3.52s/batch, loss=0.526]
[VAL ep3] {'tn': 178, 'fp': 22, 'fn': 34, 'tp': 166, 'precision': 0.8829787234042507, 'recall_tpr': 0.8299999999999959, 'f1': 0.8556701030922796, 'fpr': 0.10999999999999946, 'tpr': 0.8299999999999959, 'n': 400, 'roc_auc': 0.93307

In [None]:
# 3-2. Fusion(URL 문자열 + QR 이미지 + Contxt Branch) 모델
! python -m src.train.train_fusion \
  --train_csv data/processed/train_with_qr.csv \
  --val_csv data/processed/val_with_qr.csv \
  --test_csv data/processed/test_with_qr.csv \
  --out_dir artifacts/models/fusion \
  --augment_strength light \
  --use_context \
  --fusion_mode gated \
  --balance_sampler \
  --calibrate \
  --warn_fpr 0.01 \
  --block_fpr 0.001

Device: cuda
Train ep1: 100% 59/59 [02:28<00:00,  2.52s/batch, loss=0.645]
[VAL ep1] {'tn': 200, 'fp': 0, 'fn': 132, 'tp': 68, 'precision': 0.9999999999999853, 'recall_tpr': 0.3399999999999983, 'f1': 0.5074626865667816, 'fpr': 0.0, 'tpr': 0.3399999999999983, 'n': 400, 'roc_auc': 0.9708, 'pr_auc': 0.9786823685984515, 'ece': 0.2467148157954216}
Train ep2: 100% 59/59 [02:30<00:00,  2.54s/batch, loss=0.353]
[VAL ep2] {'tn': 198, 'fp': 2, 'fn': 14, 'tp': 186, 'precision': 0.9893617021276543, 'recall_tpr': 0.9299999999999954, 'f1': 0.9587628865974337, 'fpr': 0.00999999999999995, 'tpr': 0.9299999999999954, 'n': 400, 'roc_auc': 0.997175, 'pr_auc': 0.9969813790169781, 'ece': 0.09195077450480311}
Train ep3: 100% 59/59 [02:30<00:00,  2.54s/batch, loss=0.0799]
[VAL ep3] {'tn': 199, 'fp': 1, 'fn': 4, 'tp': 196, 'precision': 0.9949238578680153, 'recall_tpr': 0.9799999999999951, 'f1': 0.987405541561208, 'fpr': 0.004999999999999975, 'tpr': 0.9799999999999951, 'n': 400, 'roc_auc': 0.99975, 'pr_auc': 0.

In [None]:
# 3-3. Model Test Report
! python -m src.eval.eval_fusion_operational \
  --val_csv data/processed/val_with_qr.csv \
  --test_csv data/processed/test_with_qr.csv \
  --model_dir artifacts/models/fusion \
  --out_dir artifacts/reports/fusion_eval \
  --fit_temperature_on_val \
  --warn_fpr 0.01 \
  --block_fpr 0.001 \
  --use_context

Device: cuda
[OK] Saved: artifacts/reports/fusion_eval
  mode: fit_temperature_on_val | fusion_mode: gated
  warn_thr: 0.21000675857067108 block_thr: 0.9885039925575256


## 4. Security Game based Adversarial Training Model(Co-evolution Train)

- We formulate quishing detection as a co-evolutionary adversarial game, where a context-aware attacker and a defender are alternately optimized, enabling the detector to progressively adapt to increasingly realistic attack strategies.

In [None]:
# 4-1. Red Team(Attack) vs Blue Team(Defense) Co-evolution Train
! python -m src.train.train_coevolution \
  --train_csv data/processed/train_with_qr.csv \
  --val_csv data/processed/val_with_qr.csv \
  --out_dir artifacts/models/coevo_fusion_payload \
  --detector_mode fusion \
  --fusion_mode gated \
  --use_context \
  --background_dir assets/backgrounds \
  --rounds 10 \
  --attacker_pool 3 \
  --k_attack 10 \
  --k_defense 50 \
  --batch_size 64 \
  --image_size 224 \
  --balance_sampler \
  --decode_filter \
  --payload_match \
  --payload_col url_norm \
  --decode_subset 16 \
  --decode_resample 3 \
  --decode_min_keep 4

Device: cuda

[Round 1/15] difficulty_strength=0.35 mode=fusion use_context=True decode_filter=True payload_match=True
  [Attack 1/10] G2 keep=48 loss_attack=-0.6782 loss_det=0.7199
  [Attack 2/10] G1 keep=48 loss_attack=-0.6143 loss_det=0.6574
  [Attack 3/10] G2 keep=49 loss_attack=-0.6453 loss_det=0.6865
  [Attack 4/10] G2 keep=48 loss_attack=-0.6676 loss_det=0.7089
  [Attack 5/10] G0 keep=49 loss_attack=-0.6604 loss_det=0.7040
  [Attack 6/10] G0 keep=48 loss_attack=-0.6724 loss_det=0.7137
  [Attack 7/10] G1 keep=48 loss_attack=-0.6504 loss_det=0.6931
  [Attack 8/10] G1 keep=49 loss_attack=-0.6580 loss_det=0.6999
  [Attack 9/10] G2 keep=48 loss_attack=-0.6615 loss_det=0.7012
  [Attack 10/10] G2 keep=48 loss_attack=-0.6689 loss_det=0.7084
  [Defense 1/50] keep=48 loss=1.4024 clean=0.7004 adv=0.7071 rep=0.6800
  [Defense 2/50] keep=48 loss=1.3922 clean=0.6894 adv=0.6999 rep=0.7128
  [Defense 3/50] keep=48 loss=1.3888 clean=0.6985 adv=0.6922 rep=0.6821
  [Defense 4/50] keep=49 loss=1.37

In [None]:
# 4-2. Robustness Curve + WARN/BLOCK rate, TPR/FPR
! python -m src.eval.eval_robustness_curve \
  --test_csv data/processed/test_with_qr.csv \
  --out_dir artifacts/reports/coevo_robustness \
  --detector_mode fusion \
  --fusion_mode gated \
  --use_context \
  --background_dir assets/backgrounds \
  --ckpt_detector artifacts/models/coevo_fusion_payload/detector_round10.pt \
  --ckpt_attacker artifacts/models/coevo_fusion_payload/attacker0_round10.pt \
  --thresholds_json artifacts/reports/fusion_eval/thresholds.json \
  --strength_grid 0,0.2,0.4,0.6,0.8,1.0

Device: cuda
[strength=0.00] acc=0.7812
[strength=0.20] acc=0.7612
[strength=0.40] acc=0.6787
[strength=0.60] acc=0.6262
[strength=0.80] acc=0.6012
[strength=1.00] acc=0.5975
Saved: artifacts/reports/coevo_robustness/robustness_curve.csv
AUC: 0.671375036239624
Plots: artifacts/reports/coevo_robustness/robustness_accuracy.png


## 5. Real world Test

In [None]:
# 5-1. 실제 URL 매핑 샘플 20개 생성
! python scripts/make_fusion_demo_csv.py \
  --input_dir data/qr_images \
  --mapping_csv data/processed/test_with_qr.csv \
  --out_csv samples/fusion_demo.csv \
  --limit 20

[OK] Wrote: samples/fusion_demo.csv
[OK] Rows: 20
                                                             qr_path                   url_norm
/content/drive/MyDrive/q_guard/data/qr_images/1/e3880f63a7ed7ad7.png https://w3nu.yahwagsc.pro/
/content/drive/MyDrive/q_guard/data/qr_images/1/ddcac6ad475166e9.png        http://apiqido.top/
/content/drive/MyDrive/q_guard/data/qr_images/0/87b55241ffa82ded.png     https://1xbet-lua.top/
/content/drive/MyDrive/q_guard/data/qr_images/1/d6ac813bb19d6e3a.png     http://buly.kr/8IsrVQd
/content/drive/MyDrive/q_guard/data/qr_images/1/78301bf8a4a229fb.png https://05a9.yahwagsc.pro/
/content/drive/MyDrive/q_guard/data/qr_images/1/f347c8292d2c640a.png https://w6wm.yahwagsc.pro/
/content/drive/MyDrive/q_guard/data/qr_images/0/30668bf792181179.png           https://hhs.gov/
/content/drive/MyDrive/q_guard/data/qr_images/1/4b05b87275e159d2.png     https://s10.a1dv.skin/
/content/drive/MyDrive/q_guard/data/qr_images/0/3730134539027d44.png    https://storem

In [None]:
# 5-2. Classification Predict
! python -m src.app.demo_fusion_predict \
  --input_csv samples/fusion_demo.csv \
  --model_dir artifacts/models/fusion \
  --thresholds_json artifacts/reports/fusion_eval/thresholds.json \
  --out_dir artifacts/demo/fusion_run1 \
  --fusion_mode gated \
  --use_context

[OK] Saved: artifacts/demo/fusion_run1/predictions.csv
[OK] use_context: True
