In [5]:
import json
import random as rd
import os
import wandb

from src.globals import *
from src.register_datasets import register_datasets, register_by_ids
from src.pipeline_runner import run_pipeline
from src.train import do_train
from src.test import do_test

from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.modeling import build_model

In [6]:

# Setup detectron2 logger
from detectron2.utils.logger import setup_logger

logger = setup_logger(output="./log/main.log")
logger.setLevel(0)

# Active Learning

## Random Benchmark
first of all we want to benchmark our models when choosing the data randomly 

In [7]:
register_datasets()

In [None]:
%%capture
with open(PATH_TRAIN_FULL_JSON) as file:
    train_dict = json.load(file)
        
image_ids_full = [image["image_id"] for image in DatasetCatalog.get(TRAIN_DATASET_FULL)]





cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))

cfg.DATASETS.TEST = (VALIDATION_DATASET_SLIM,)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 300    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.WARMUP_ITERS = 0
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
cfg.OUTPUT_DIR = "./output/al_tester"
#cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")

model = build_model(cfg)
# initialize weights and biases
wandb.init(project="activeCell-ACDC", sync_tensorboard=True)
result = do_test(cfg, model, logger)
print(result)

rd.seed(0)
sample_ids = rd.sample(image_ids_full, 11) 
missing_ids = list(set(image_ids_full) - set(sample_ids))

results = []


for i in range(7):    
    
    
    if not missing_ids:
        break
    
    num_new_samples = min(len(missing_ids),10)
    sample_ids += rd.sample(missing_ids, num_new_samples)
    print(sample_ids)
    missing_ids = list(set(image_ids_full) - set(sample_ids))
    dataset_name = register_by_ids("test_sample",sample_ids)
    
    cfg.DATASETS.TRAIN = (dataset_name,)
    cfg.SOLVER.MAX_ITER = len(sample_ids)*20
    cfg.SOLVER.STEPS = [len(sample_ids)*10]
    
    do_train(cfg, model, logger,resume= i > 0)
    
    result = do_test(cfg, model, logger)
    print(result)
    results.append(result)
    


Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.



In [None]:
result

In [None]:
[result['segm']['AP'] for result in results]