# Smart Lens v2 - Fine-Tune Existing Model with More Data

**Strategy: Transfer Learning from your trained best.pt**

This notebook does NOT start from scratch. It loads your trained model and fine-tunes it.

### Pipeline:
1. Mount Drive & upload best.pt
2. Download your existing dataset (Roboflow)
3. Download additional open-source datasets for each class
4. Merge & deduplicate all datasets into unified YOLOv8 format
5. Fine-tune from best.pt with merged dataset
6. Evaluate & compare with v1
7. Export & download improved model

---
## Section 1: Environment Setup

In [None]:
# 1A: Install dependencies
!pip install -q ultralytics roboflow opencv-python-headless

import os, shutil, glob, yaml, json, random, time
from pathlib import Path
from datetime import datetime
from collections import Counter

import torch
print(f'PyTorch: {torch.__version__}')
print(f'CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
    print(f'VRAM: {torch.cuda.get_device_properties(0).total_mem / 1024**3:.1f} GB')

In [None]:
# 1B: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

DRIVE_DIR = '/content/drive/MyDrive/Smart-Lens-FYP'
os.makedirs(DRIVE_DIR, exist_ok=True)
os.makedirs(f'{DRIVE_DIR}/models', exist_ok=True)
print(f'Drive directory: {DRIVE_DIR}')

In [None]:
# 1C: Upload your existing best.pt model
from google.colab import files

MODEL_PATH = '/content/best.pt'
drive_model = f'{DRIVE_DIR}/models/best_v1.pt'
if os.path.exists(drive_model):
    shutil.copy(drive_model, MODEL_PATH)
    print(f'Loaded model from Drive: {drive_model}')
elif not os.path.exists(MODEL_PATH):
    print('Upload your best.pt file:')
    uploaded = files.upload()
    for name in uploaded:
        shutil.move(name, MODEL_PATH)
    shutil.copy(MODEL_PATH, drive_model)
    print(f'Saved backup to Drive: {drive_model}')

print(f'Model ready: {MODEL_PATH} ({os.path.getsize(MODEL_PATH)/1024/1024:.1f} MB)')

---
## Section 2: Download & Merge Datasets

### How to add additional datasets:
1. Go to universe.roboflow.com
2. Search for: gun detection, knife detection, fire detection, violence detection
3. Pick a dataset (1000+ images, good ratings)
4. Click Download Dataset > Format: YOLOv8 > show download code
5. Copy workspace, project, version from the code
6. Add to ADDITIONAL_DATASETS list in Cell 2B

Class IDs: 0=Fighting, 1=Fire, 2=Gun, 3=Knife

In [None]:
# 2A: Download your ORIGINAL dataset
from roboflow import Roboflow

rf = Roboflow(api_key='7QsEv54uizzlrvPZ972Z')
project = rf.workspace('fpy').project('smart-survellaince-lens-2')
version = project.version(1)
original_ds = version.download('yolov8', location='/content/datasets/original')
print(f'Original dataset downloaded')

In [None]:
# 2B: Download ADDITIONAL open-source datasets
# HOW TO ADD: Go to universe.roboflow.com, find a dataset,
# click Download > YOLOv8 > show download code,
# then copy workspace/project/version into an entry below.
#
# class_map: maps SOURCE class names to YOUR target class IDs
#   Fighting=0, Fire=1, Gun=2, Knife=3

ADDITIONAL_DATASETS = [
    # UNCOMMENT & EDIT with real values from Roboflow Universe:
    #
    # {
    #     'name': 'Gun Detection Dataset',
    #     'workspace': 'paste-workspace-from-download-code',
    #     'project': 'paste-project-from-download-code',
    #     'version': 1,
    #     'class_map': {'handgun': 2, 'gun': 2, 'pistol': 2, 'weapon': 2},
    # },
    # {
    #     'name': 'Knife Detection Dataset',
    #     'workspace': 'paste-workspace-from-download-code',
    #     'project': 'paste-project-from-download-code',
    #     'version': 1,
    #     'class_map': {'knife': 3, 'blade': 3},
    # },
    # {
    #     'name': 'Fire Smoke Detection',
    #     'workspace': 'paste-workspace-from-download-code',
    #     'project': 'paste-project-from-download-code',
    #     'version': 1,
    #     'class_map': {'fire': 1, 'smoke': 1, 'flame': 1},
    # },
]

downloaded_paths = []
for i, ds_info in enumerate(ADDITIONAL_DATASETS):
    name = ds_info['name']
    print(f'\n[{i+1}/{len(ADDITIONAL_DATASETS)}] Downloading: {name}...')
    try:
        rf2 = Roboflow(api_key='7QsEv54uizzlrvPZ972Z')
        proj = rf2.workspace(ds_info['workspace']).project(ds_info['project'])
        ver = proj.version(ds_info['version'])
        dl_path = f'/content/datasets/extra_{i}'
        ver.download('yolov8', location=dl_path)
        ds_info['local_path'] = dl_path
        downloaded_paths.append(ds_info)
        n_imgs = len(glob.glob(f'{dl_path}/**/images/*', recursive=True))
        print(f'  Done: {n_imgs} images')
    except Exception as e:
        print(f'  Failed: {e}')
        print(f'  Skipping...')

if len(ADDITIONAL_DATASETS) == 0:
    print('No additional datasets configured.')
    print('Fine-tuning on original 1793 images only (still beneficial).')
    print('To add more data: see instructions above.')
else:
    print(f'\n{len(downloaded_paths)}/{len(ADDITIONAL_DATASETS)} datasets downloaded')

In [None]:
# 2B-ALT: Manual dataset upload (OPTIONAL)
import zipfile

UPLOAD_DATASETS = False  # Set True to upload ZIP datasets manually

if UPLOAD_DATASETS:
    print('Upload YOLOv8 dataset ZIP files:')
    uploaded = files.upload()
    for zip_name in uploaded:
        extract_path = f'/content/datasets/uploaded_{Path(zip_name).stem}'
        os.makedirs(extract_path, exist_ok=True)
        with zipfile.ZipFile(zip_name, 'r') as z:
            z.extractall(extract_path)
        n = len(glob.glob(f'{extract_path}/**/images/*', recursive=True))
        print(f'Extracted {n} images to: {extract_path}')
        downloaded_paths.append({
            'name': f'Uploaded: {zip_name}',
            'local_path': extract_path,
            'class_map': {
                'gun': 2, 'handgun': 2, 'pistol': 2, 'weapon': 2,
                'knife': 3, 'blade': 3,
                'fire': 1, 'smoke': 1, 'flame': 1,
                'fighting': 0, 'violence': 0, 'fight': 0,
            },
        })
else:
    print('Manual upload disabled. Set UPLOAD_DATASETS=True to enable.')

In [None]:
# 2C: MERGE all datasets into one unified dataset
MERGED_DIR = '/content/merged_dataset'
for s in ['train','valid','test']:
    os.makedirs(f'{MERGED_DIR}/{s}/images', exist_ok=True)
    os.makedirs(f'{MERGED_DIR}/{s}/labels', exist_ok=True)

TARGET_CLASSES = {0: 'Fighting', 1: 'Fire', 2: 'Gun', 3: 'Knife'}
stats = {'total_images': 0, 'per_class': Counter(), 'per_source': Counter()}

def copy_dataset(src_dir, prefix, class_remap=None, source_data_yaml=None):
    src_class_names = {}
    if source_data_yaml and os.path.exists(source_data_yaml):
        with open(source_data_yaml) as f:
            src_yaml = yaml.safe_load(f)
            names = src_yaml.get('names', [])
            if isinstance(names, list):
                src_class_names = {i: n for i, n in enumerate(names)}
            elif isinstance(names, dict):
                src_class_names = names
    copied = 0
    for split in ['train', 'valid', 'test']:
        img_dir = os.path.join(src_dir, split, 'images')
        lbl_dir = os.path.join(src_dir, split, 'labels')
        if not os.path.isdir(img_dir): continue
        for img_file in glob.glob(os.path.join(img_dir, '*')):
            stem = Path(img_file).stem
            ext = Path(img_file).suffix
            lbl_file = os.path.join(lbl_dir, f'{stem}.txt')
            new_name = f'{prefix}_{stem}'
            new_img = os.path.join(MERGED_DIR, split, 'images', f'{new_name}{ext}')
            new_lbl = os.path.join(MERGED_DIR, split, 'labels', f'{new_name}.txt')
            if not os.path.exists(lbl_file): continue
            if class_remap and src_class_names:
                remapped = []
                with open(lbl_file) as f:
                    for line in f:
                        parts = line.strip().split()
                        if len(parts) < 5: continue
                        src_id = int(parts[0])
                        src_name = src_class_names.get(src_id, '')
                        tgt = None
                        for k, v in class_remap.items():
                            if k.lower() == src_name.lower():
                                tgt = v; break
                        if tgt is None and src_id in class_remap.values():
                            tgt = src_id
                        if tgt is not None:
                            parts[0] = str(tgt)
                            remapped.append(' '.join(parts))
                            stats['per_class'][TARGET_CLASSES[tgt]] += 1
                if not remapped: continue
                shutil.copy2(img_file, new_img)
                with open(new_lbl, 'w') as f:
                    f.write('\n'.join(remapped) + '\n')
            else:
                shutil.copy2(img_file, new_img)
                shutil.copy2(lbl_file, new_lbl)
                with open(lbl_file) as f:
                    for line in f:
                        parts = line.strip().split()
                        if parts:
                            cid = int(parts[0])
                            stats['per_class'][TARGET_CLASSES.get(cid, f'Unk_{cid}')] += 1
            copied += 1
    return copied

print('Copying original dataset...')
n = copy_dataset('/content/datasets/original', prefix='orig')
stats['per_source']['Original'] = n
stats['total_images'] += n
print(f'  {n} images from original dataset')

for ds_info in downloaded_paths:
    name = ds_info['name']
    path = ds_info['local_path']
    remap = ds_info['class_map']
    pfx = name.replace(' ', '_').lower()[:15]
    dy = os.path.join(path, 'data.yaml')
    print(f'\nMerging: {name}...')
    if os.path.exists(dy):
        with open(dy) as f:
            print(f'  Source classes: {yaml.safe_load(f).get("names", "?")}')
    n = copy_dataset(path, prefix=pfx, class_remap=remap, source_data_yaml=dy)
    stats['per_source'][name] = n
    stats['total_images'] += n
    print(f'  {n} images merged')

print(f'\n{"="*60}')
print(f'MERGE COMPLETE - Total: {stats["total_images"]} images')
print(f'{"="*60}')
for src, count in stats['per_source'].items():
    print(f'  {src}: {count}')
print('Per class:')
for cls, count in stats['per_class'].most_common():
    print(f'  {cls}: {count}')

In [None]:
# 2D: Re-split and create data.yaml
all_train = glob.glob(f'{MERGED_DIR}/train/images/*')
all_valid = glob.glob(f'{MERGED_DIR}/valid/images/*')
all_test = glob.glob(f'{MERGED_DIR}/test/images/*')
print(f'Before: train={len(all_train)} valid={len(all_valid)} test={len(all_test)}')

total = len(all_train) + len(all_valid) + len(all_test)
tv = int(total * 0.15)
tt = int(total * 0.05)
if len(all_valid) < tv * 0.5:
    print('Redistributing...')
    for imgs in [all_valid, all_test]:
        for img in imgs:
            ext = Path(img).suffix
            lbl = img.replace('/images/', '/labels/').replace(ext, '.txt')
            shutil.move(img, f'{MERGED_DIR}/train/images/{Path(img).name}')
            if os.path.exists(lbl): shutil.move(lbl, f'{MERGED_DIR}/train/labels/{Path(lbl).name}')
    ai = glob.glob(f'{MERGED_DIR}/train/images/*')
    random.seed(42); random.shuffle(ai)
    for imgs, sp in [(ai[:tv], 'valid'), (ai[tv:tv+tt], 'test')]:
        for img in imgs:
            ext = Path(img).suffix
            lbl = img.replace('/images/', '/labels/').replace(ext, '.txt')
            shutil.move(img, f'{MERGED_DIR}/{sp}/images/{Path(img).name}')
            if os.path.exists(lbl): shutil.move(lbl, f'{MERGED_DIR}/{sp}/labels/{Path(lbl).name}')

ft = len(glob.glob(f'{MERGED_DIR}/train/images/*'))
fv = len(glob.glob(f'{MERGED_DIR}/valid/images/*'))
fte = len(glob.glob(f'{MERGED_DIR}/test/images/*'))
print(f'Final: train={ft} valid={fv} test={fte} total={ft+fv+fte}')

data_yaml = {'names': ['Fighting','Fire','Gun','Knife'], 'nc': 4,
    'train': f'{MERGED_DIR}/train/images',
    'val': f'{MERGED_DIR}/valid/images',
    'test': f'{MERGED_DIR}/test/images'}
data_yaml_path = f'{MERGED_DIR}/data.yaml'
with open(data_yaml_path, 'w') as f:
    yaml.dump(data_yaml, f, default_flow_style=False)
print(f'data.yaml created: {data_yaml_path}')

---
## Section 3: Fine-Tune Model from best.pt

Key differences from training from scratch:
- model = YOLO('best.pt') instead of YOLO('yolov8s.pt')
- Lower learning rate (0.0005 vs 0.001)
- Fewer epochs (100 vs 200)
- Less aggressive augmentation

In [None]:
# 3A: Configure fine-tuning
STRATEGY = 'full'  # 'full' or 'freeze_backbone'

FINETUNE_CONFIG = {
    'model': MODEL_PATH, 'data': data_yaml_path,
    'epochs': 100, 'patience': 30, 'batch': 16, 'imgsz': 640, 'device': 0,
    'optimizer': 'AdamW', 'lr0': 0.0005, 'lrf': 0.01,
    'weight_decay': 0.0005, 'warmup_epochs': 3,
    'hsv_h': 0.015, 'hsv_s': 0.5, 'hsv_v': 0.3,
    'degrees': 5.0, 'translate': 0.15, 'scale': 0.4, 'shear': 2.0,
    'flipud': 0.0, 'fliplr': 0.5, 'mosaic': 1.0, 'mixup': 0.1,
    'copy_paste': 0.1, 'erasing': 0.3, 'close_mosaic': 15,
    'freeze': 10 if STRATEGY == 'freeze_backbone' else None,
    'dropout': 0.05, 'save': True, 'save_period': 10, 'plots': True,
    'project': '/content/runs', 'name': 'smart_lens_v2', 'exist_ok': True,
}
print(f'Strategy: {STRATEGY} | LR: {FINETUNE_CONFIG["lr0"]} | Epochs: {FINETUNE_CONFIG["epochs"]}')

In [None]:
# 3B: START FINE-TUNING
from ultralytics import YOLO

model = YOLO(FINETUNE_CONFIG['model'])
print(f'Model loaded | Classes: {model.names}')
print(f'Starting fine-tuning...\n')

start_time = time.time()
train_args = {k: v for k, v in FINETUNE_CONFIG.items() if v is not None and k != 'model'}
results = model.train(**train_args)
training_time = (time.time() - start_time) / 60
print(f'\nFine-tuning complete! Time: {training_time:.1f} minutes')

---
## Section 4: Evaluate & Compare

In [None]:
# 4A: Validate fine-tuned model
from ultralytics import YOLO
best_v2_path = '/content/runs/smart_lens_v2/weights/best.pt'
model_v2 = YOLO(best_v2_path)
metrics_v2 = model_v2.val(data=data_yaml_path, split='val')

print(f'\n{"="*60}')
print(f'  v2 Metrics: mAP50={metrics_v2.box.map50:.4f} | mAP50-95={metrics_v2.box.map:.4f}')
print(f'  Precision={metrics_v2.box.mp:.4f} | Recall={metrics_v2.box.mr:.4f}')
for i, cn in enumerate(TARGET_CLASSES.values()):
    if i < len(metrics_v2.box.ap50):
        print(f'    {cn}: {metrics_v2.box.ap50[i]:.4f}')
print(f'{"="*60}')

In [None]:
# 4B: Compare v1 vs v2
v1 = {'mAP50': 0.7255, 'mAP50_95': 0.3322, 'precision': 0.8410, 'recall': 0.6213}
v2 = {'mAP50': metrics_v2.box.map50, 'mAP50_95': metrics_v2.box.map,
      'precision': metrics_v2.box.mp, 'recall': metrics_v2.box.mr}

print(f'\n{"Metric":<15} {"v1":>10} {"v2":>10} {"Delta":>10}')
print('-'*50)
for k in v1:
    d = v2[k] - v1[k]
    a = '+' if d > 0 else ''
    print(f'{k:<15} {v1[k]:>10.4f} {v2[k]:>10.4f} {a}{d:>9.4f}')

if v2['mAP50'] > v1['mAP50']:
    print('\nv2 is BETTER than v1!')
else:
    print('\nv2 similar/lower - add more diverse data for bigger gains')

---
## Section 5: Export & Save

In [None]:
# 5A: Export ONNX + Save to Drive
model_v2.export(format='onnx', simplify=True)
ts = datetime.now().strftime('%Y%m%d_%H%M')
odir = f'{DRIVE_DIR}/models/smart_lens_v2_{ts}'
os.makedirs(odir, exist_ok=True)
shutil.copy(best_v2_path, f'{odir}/best.pt')
shutil.copy(best_v2_path.replace('.pt','.onnx'), f'{odir}/best.onnx')
shutil.copy('/content/runs/smart_lens_v2/weights/last.pt', f'{odir}/last.pt')

md = {'mAP50': float(metrics_v2.box.map50), 'mAP50_95': float(metrics_v2.box.map),
      'precision': float(metrics_v2.box.mp), 'recall': float(metrics_v2.box.mr),
      'training_time_min': round(training_time, 1), 'base': 'smart_lens_v1',
      'strategy': STRATEGY, 'total_images': stats['total_images'],
      'classes': list(TARGET_CLASSES.values())}
with open(f'{odir}/metrics.json', 'w') as f: json.dump(md, f, indent=2)
with open(f'{odir}/training_config.json', 'w') as f: json.dump(FINETUNE_CONFIG, f, indent=2, default=str)

print(f'Saved to: {odir}/')
for fn in os.listdir(odir):
    sz = os.path.getsize(f'{odir}/{fn}') / 1024 / 1024
    print(f'  {fn} ({sz:.1f} MB)')

In [None]:
# 5B: Download to local machine
from google.colab import files
files.download(f'{odir}/best.pt')
print('Place in: Smart-Lens-FYP/trained_models/smart_lens_v2_XXXX/best.pt')

---
## Section 6: Quick Test

In [None]:
import matplotlib.pyplot as plt
test_imgs = glob.glob(f'{MERGED_DIR}/test/images/*')[:8]
fig, axes = plt.subplots(2, 4, figsize=(20, 10))
for i, ip in enumerate(axes.flatten()):
    if i < len(test_imgs):
        r = model_v2.predict(test_imgs[i], conf=0.4, verbose=False)
        ip.imshow(r[0].plot()[:,:,::-1])
        ip.set_title(Path(test_imgs[i]).stem[:30], fontsize=8)
    ip.axis('off')
plt.suptitle('Smart Lens v2 - Test Predictions', fontsize=16)
plt.tight_layout()
plt.savefig(f'{odir}/test_predictions.png', dpi=150)
plt.show()

---
## Done! Next Steps

1. Download best.pt (above or from Google Drive)
2. Place in Smart-Lens-FYP/trained_models/smart_lens_v2_XXXX/
3. Test: python smart_lens_v2.py --source 0 --model trained_models/smart_lens_v2_XXXX/best.pt

### To train v3 with more data:
1. Go to universe.roboflow.com
2. Find gun/knife/fire/fighting datasets
3. Click Download > YOLOv8 > show download code
4. Add to Cell 2B ADDITIONAL_DATASETS list
5. Upload v2 best.pt as starting model
6. Re-run this notebook