In [1]:
# notebooks/colab.ipynb
import os
# Change to the MILS_HW2 directory first
os.chdir('../MILS_HW2')

# Cell 1: Setup and downloads
!pip install -r requirements.txt
# Download all datasets
!python scripts/download_imagenette_cls.py
!python scripts/download_coco_det.py
!python scripts/download_voc_seg.py

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Processing /rapids/aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (from -r requirements.txt (line 4))
[31mERROR: Could not install packages due to an OSError: [Errno 2] No such file or directory: '/rapids/aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl'
[0m[31m
[0mTraining subset size: 240
Validation subset size: 60
Downloading training set...
Downloading COCO annotations...
Extracting annotations...
loading annotations into memory...
Done (t=8.30s)
creating index...
index created!
Downloading 240 images for train set...
Downloaded 10/240 images
Downloaded 20/240 images
Downloaded 30/240 images
Downloaded 40/240 images
Downloaded 50/240 images
Downloaded 60/240 images
Downloaded 70/240 images
Downloaded 80/240 images
Downloaded 90/240 images
Downloaded 100/240 images
Downloaded 110/240 images
Downloaded 120/240 images
Downloaded 130/240 images
Downloaded 140/2

In [2]:
# Cell 2: Verify downloads
# Add a verification step
print("Verifying downloads...")
import torch
data_paths = {
    'seg': './data/VOCdevkit/VOC2012',
    'det': './data/coco_subset',
    'cls': './data/imagenette2-160'
}

for task, path in data_paths.items():
    if os.path.exists(path):
        print(f"{task} dataset found at {path}")
    else:
        print(f"WARNING: {task} dataset not found at {path}")
        
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Verifying downloads...
seg dataset found at ./data/VOCdevkit/VOC2012
det dataset found at ./data/coco_subset
cls dataset found at ./data/imagenette2-160
Using device: cuda


In [3]:
# Cell 3: Model and Data initialization 
from src.models.unified_model import UnifiedModel
from src.datasets.data_loaders import create_dataloaders
from src.training.loss_functions import MultiTaskLoss, UncertaintyWeightedLoss
from configs.config import Config  # 使用Config類

# 初始化配置
config = Config()  # 創建Config實例，不是模組

# 初始化損失函數
criterion = MultiTaskLoss()
print("Loss functions initialized")

# 初始化模型
model = UnifiedModel().to(device)
print(f"Total parameters: {sum(p.numel() for p in model.parameters())/1e6:.1f}M")

total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params/1e6:.2f}M")
assert total_params <= 8e6, f"Parameter count {total_params/1e6:.2f}M exceeds 8M limit!"

# 創建數據載入器 (根據你的下載檔案)
print("Loading datasets...")
dataloaders = create_dataloaders(
    batch_size=config.batch_size,
    num_workers=config.num_workers
)
print("dataloaders", dataloaders)

# 準備datasets字典給trainer使用 (只用train set)
datasets = {
    'seg': dataloaders['seg']['train'],
    'seg_val': dataloaders['seg']['val'],
    'det': dataloaders['det']['train'],
    'det_val': dataloaders['det']['val'],
    'cls': dataloaders['cls']['train'],
    'cls_val': dataloaders['cls']['val']
}
print("datasets:\n", datasets)
print("Datasets loaded successfully!")
print(f"Detection batches: {len(datasets['det'])}")
print(f"Segmentation batches: {len(datasets['seg'])}")
print(f"Classification batches: {len(datasets['cls'])}")

Loss functions initialized
Total parameters: 3.2M
Total parameters: 3.15M
Loading datasets...




loading annotations into memory...
Done (t=8.52s)
creating index...
index created!
loading annotations into memory...
Done (t=1.08s)
creating index...
index created!
dataloaders {'seg': {'train': <torch.utils.data.dataloader.DataLoader object at 0x7f868f7bf190>, 'val': <torch.utils.data.dataloader.DataLoader object at 0x7f868f7bf310>}, 'cls': {'train': <torch.utils.data.dataloader.DataLoader object at 0x7f868f7bfc10>, 'val': <torch.utils.data.dataloader.DataLoader object at 0x7f868f7bf9a0>}, 'det': {'train': <torch.utils.data.dataloader.DataLoader object at 0x7f868f7bf6d0>, 'val': <torch.utils.data.dataloader.DataLoader object at 0x7f868f7bf520>}}
datasets:
 {'seg': <torch.utils.data.dataloader.DataLoader object at 0x7f868f7bf190>, 'seg_val': <torch.utils.data.dataloader.DataLoader object at 0x7f868f7bf310>, 'det': <torch.utils.data.dataloader.DataLoader object at 0x7f868f7bf6d0>, 'det_val': <torch.utils.data.dataloader.DataLoader object at 0x7f868f7bf520>, 'cls': <torch.utils.data.dat

In [4]:
# Cell 4: Three-stage training
from src.training.trainer import MultiTaskTrainer
trainer = MultiTaskTrainer(model, datasets, config, criterion=criterion)

# Stage 1: Segmentation baseline
print("=== Stage 1: Segmentation Only ===")
seg_performance = trainer.train_stage1_segmentation(epochs=20)
print(f"Segmentation mIoU baseline: {seg_performance['miou']:.4f}")

# Debug: Check dataloader lengths and try to fetch a batch
print('Detection train batches:', len(datasets['det']))
print('Classification train batches:', len(datasets['cls']))
    
# Stage 2: Detection with EWC
print("=== Stage 2: Detection Only ===")
det_performance = trainer.train_stage2_detection(epochs=15)
seg_drop = trainer.evaluate_forgetting('segmentation')
print(f"Detection mAP baseline: {det_performance['map']:.4f}")
print(f"Segmentation mIoU drop: {seg_drop:.2f}%")

# Stage 3: Classification with replay
print("=== Stage 3: Classification Only ===")
cls_performance = trainer.train_stage3_classification(epochs=15)
final_performance = trainer.evaluate_all_tasks()

# # Validate forgetting constraint
# success = trainer.validate_forgetting_constraint()
# print(f"Forgetting constraint satisfied: {success}")
# 檢查5%性能下降約束
for task, drop in final_performance['drops'].items():
    print(f"{task} performance drop: {drop:.2f}%")
    assert drop <= 5.0, f"{task} drop {drop:.2f}% exceeds 5% limit!"

TypeError: MultiTaskTrainer.__init__() got an unexpected keyword argument 'criterion'

In [None]:
# Cell 5: Final evaluation
# !python scripts/eval.py --weights checkpoints/final_model.pt --dataroot data --tasks all
model.eval()
dummy_input = torch.randn(1, 3, 512, 512).to(device)

# 預熱
for _ in range(10):
    with torch.no_grad():
        _ = model(dummy_input)

# 測試推理速度
start_time = time.time()
for _ in range(100):
    with torch.no_grad():
        outputs = model(dummy_input)
avg_time = (time.time() - start_time) / 100 * 1000  # ms

print(f"Average inference time: {avg_time:.2f}ms")
assert avg_time <= 150, f"Inference time {avg_time:.2f}ms exceeds 150ms limit!"

In [None]:
# Cell 6: Save model
torch.save(model.state_dict(), 'mils_hw2.pt')
print("Model saved successfully!")

import json
results = {
    'final_performance': final_performance,
    'parameter_count': total_params,
    'inference_time_ms': avg_time
}

with open('results.json', 'w') as f:
    json.dump(results, f, indent=2)