# COMPRESSED OBJECT DETECTION

**Extension of pruning and quantization to the task of object detection**

In this work, we extended pruning, a compression technique which discards unnecessary model connections, and weight sharing techniques for the task of object detection. With our approach we are able to compress a state-of-the-art object detection model by 30.0% without a loss in performance. We also show that our compressed model can be easily initialized with existing pre-trained weights, and thus is able to fully utilize published state-of-the-art model zoos.

In [2]:
## Imports
import numpy as np
import os
import torch
from datetime import date, time, datetime
from zipfile import ZipFile
import cv2

import utils

import detectron2
from detectron2.engine import DefaultPredictor
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.utils.visualizer import Visualizer

import xml.etree.ElementTree as ET
from typing import List, Tuple, Union
from fvcore.common.file_io import PathManager
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode

# Section 1. MODEL TRAINING

### Random seed
Set the random seed for both torch and numpy in order to make the results repricable

In [3]:
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

set_seed(42)

### Datasets
Register the dataset into the format that Detectron2 will understand

In [4]:
## Dataset
dirname = './dataset/data/'

CLASS_NAMES = ('giraffe',
                'person',
                'zebra',
                'elephant',
                'impala',
                'monkey',
                'lion',
                'leopard',
                'crocodile',
                'buffalo',
                'hyna',
                'bird',
                'gorilla')

def load_voc_instances(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]):
    """
    Load Pascal VOC detection annotations to Detectron2 format.
        Args:
            dirname: Contain "annotations", "images", "train.txt", "valid.txt"
            split (str): one of "train", "valid"
            class_names: list or tuple of class names
    """
    
    with PathManager.open(os.path.join(dirname, split + ".txt")) as f:
        fileids = np.loadtxt(f, dtype=np.str)

    # Needs to read many small annotation files. Makes sense at local
    annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "annotations/"))
    dicts = []
    for fileid in fileids:
        anno_file = os.path.join(annotation_dirname, fileid + ".xml")
        jpeg_file = os.path.join(dirname, "images/", fileid + ".jpg")

        with PathManager.open(anno_file) as f:
            tree = ET.parse(f)

        r = {
            "file_name": jpeg_file,
            "image_id": fileid,
            "height": int(tree.findall("./size/height")[0].text),
            "width": int(tree.findall("./size/width")[0].text),
        }
        instances = []

        for obj in tree.findall("object"):
            cls = obj.find("name").text
            bbox = obj.find("bndbox")
            bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]]
            bbox[0] -= 1.0
            bbox[1] -= 1.0
            instances.append(
                {"category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS}
            )
        r["annotations"] = instances
        dicts.append(r)
    return dicts


def register_pascal_voc(name, dirname, split, year, class_names=CLASS_NAMES):
    meta_data = DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split, class_names))
    catalog = MetadataCatalog.get(name).set(
        thing_classes=list(class_names), dirname=dirname, year=year, split=split
    )

In [5]:

register_pascal_voc('train_data', dirname, split = 'train', year='2020', class_names=CLASS_NAMES)
train_dataset_dicts = DatasetCatalog.get('train_data')
train_metadata=MetadataCatalog.get('train_data')
register_pascal_voc('valid_data', dirname, split = 'valid', year='2020', class_names=CLASS_NAMES)
valid_dataset_dicts = DatasetCatalog.get('valid_data')
valid_metadata=MetadataCatalog.get('valid_data')

In [6]:
#valid_dataset_dicts[0]

In [6]:
# Lets look at some to the training images

images_index = [0,7,78,156,444]
for index in images_index:
    img = cv2.imread(train_dataset_dicts[index]["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=train_metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(train_dataset_dicts[index])
    im_path = train_dataset_dicts[index]["file_name"]
    name = im_path.split('/')[-1]
    my_im = vis.get_image()[:, :, ::-1]
    filename = './temp_files/train_images/'+name
    cv2.imwrite(filename, my_im)

### Model
I will be using the faster_rcnn_R_50_FPN_3x with the coco initial weights

In [7]:
import utils
def faster_rcnn_R_50_FPN_3x(init_weights_flag = True, path='./chek_points'):


      """
      This function takes two parameters:
        1.  init_weights_flag : This is a flag that defines the initial weights of the backbone network of our R-CNN
        2. path : this is the path where I need to save my model
      
      This function returns the predictor, after training the model it will loads the weights from the directory and the model configurations(cfg)

      Parameters are defined in the same way as our lecturer said:
        train for 800 iterations, a start learning rate of 0.02, 2 images per batch, and 128 regions per batch
      """
      path = path + utils.model_dir()
      utils.handle_dirs(path)

      cfg = get_cfg()
      cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
      cfg.DATASETS.TRAIN = ("train_data",)
      cfg.DATASETS.TEST = ()
      cfg.DATALOADER.NUM_WORKERS = 4
      cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")
      cfg.SOLVER.IMS_PER_BATCH = 8 # images per batch
      cfg.SOLVER.BASE_LR = 0.02  # Learning rate
      cfg.SOLVER.MAX_ITER = 2000    # number of iterations 
      cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 #regions per batch
      cfg.MODEL.ROI_HEADS.NUM_CLASSES = 13  #THE NUMBER OF classes
      cfg.with_coco_init = init_weights_flag # Flag to initialize weight to COCO or IMAGENET

      #create a path where I will store my model, this path defers for each model
      os.makedirs(path, exist_ok=True)
      cfg.OUTPUT_DIR = path
      trainer = DefaultTrainer(cfg) 
      trainer.resume_or_load(resume=False)
      trainer.train()

      ## After training lets return the predictor
      cfg.MODEL.WEIGHTS = os.path.join(path, "model_final.pth")
      cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.67   # set the testing threshold for this model
      cfg.DATASETS.TEST = ("valid_data", )

      return DefaultPredictor(cfg),cfg,trainer
#coco_init_predictor, coco_init_cfg, coco_init_trainer = faster_rcnn_R_50_FPN_3x()

In [8]:
#pred, conff, treen = faster_rcnn_R_50_FPN_3x()

#### Load checkpoints

In [7]:
def load_checkpoints(path, filename):
    """
    This function will load the checkpoints
    @parameter: path==>Is the path to the directory where the check points are stored
    @parameter: filename==> is the file name of your checkpoints, it is a .pth file
    
    """
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
    cfg.DATASETS.TRAIN = ("train_data",)
    cfg.DATASETS.TEST = ()
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.MODEL.WEIGHTS = os.path.join(path,filename)
    cfg.SOLVER.IMS_PER_BATCH = 8 # images per batch
    cfg.SOLVER.BASE_LR = 0.02  # Learning rate
    cfg.SOLVER.MAX_ITER = 10    # number of iterations 
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 #regions per batch
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 13  #THE NUMBER OF classes
    #create a path where I will store my model, this path defers for each model
    cfg.OUTPUT_DIR = path
    trainer = DefaultTrainer(cfg) 
    trainer.resume_or_load(resume=False)
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.67   # set the testing threshold for this model
    cfg.DATASETS.TEST = ("valid_data", )

    return DefaultPredictor(cfg),cfg,trainer

In [1]:
CHECKPOINT_DIR = "path where your checkpoint are stored"
CHECKPOINT = "model_final.pth"
predictor, cfg_file, trainer_file = load_checkpoints(CHECKPOINT_DIR, CHECKPOINT)

### Model evaluation

In [24]:
######## ---------Final results--------------
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
import pandas as pd
import csv
def model_evaluator(cfg,model,name='FULL_PRECISION',pruned=''):
    save_dir = './RESULTS/'+ name + pruned
    utils.handle_dirs(save_dir)
    PATH = save_dir + '/checkpoints'
    utils.handle_dirs(PATH)
    PATH = PATH + '/model.pth'
    torch.save(model.state_dict(), PATH)
    evaluator = COCOEvaluator("valid_data", cfg, False, output_dir=save_dir)
    val_loader = build_detection_test_loader(cfg, "valid_data")
    inference_on_dataset(model, val_loader, evaluator)
    evaluations = inference_on_dataset(model,val_loader,evaluator)
    t = evaluations['bbox']
    with open(save_dir+'/evaluations.csv', 'w') as f:
        for key in t.keys():
            f.write("%s,%s\n"%(key,t[key]))
    return evaluations

In [1]:
#model_evaluator(cfg_file, trainer_file.model,name='L1',pruned='_100_percent_pruned')

In [14]:
# from detectron2.evaluation import COCOEvaluator, inference_on_dataset
# from detectron2.data import build_detection_test_loader
# save_dir = './chek_points' + utils.model_dir('/evaluation')
# utils.handle_dirs(save_dir)
# evaluator = COCOEvaluator("valid_data", coco_init_cfg, False, output_dir=save_dir)
# val_loader = build_detection_test_loader(coco_init_cfg, "valid_data")
# inference_on_dataset(coco_init_trainer.model, val_loader, evaluator)

### Flops count

In [15]:
#valid_metadata
from detectron2.utils.analysis import flop_count_operators
def mean_flops_counter(model,images_dict):
    """
    This function count flops of a model, it returns the mean of flops over the given dataset.
    @parameter: model==>this is the model you want to count flops
    @parameter: image==>This is an input images in detectron2 format (list[dict])
    
    """
    average_flops = []
    for image in images_dict:
        image["image"] =  torch.Tensor(cv2.imread(image["file_name"])).permute(2,0,1)
        flops_per_imange = flop_count_operators(model, [image])
        average_flops.append(flops_per_imange['conv'])
    
    return sum(average_flops)/len(average_flops)

In [17]:
# model = trainer_file.model
# mean_flops_counter(model,valid_dataset_dicts)

# Section 2. MODEL PRUNING

In [27]:
model = trainer_file.model

In [28]:
parameters_to_prune = (
    
#####res2
(model.backbone.bottom_up.res2[0].conv1,  'weight'),
(model.backbone.bottom_up.res2[0].conv2,  'weight'),
(model.backbone.bottom_up.res2[0].conv3,  'weight'),

(model.backbone.bottom_up.res2[1].conv1,  'weight'),
(model.backbone.bottom_up.res2[1].conv2,  'weight'),
(model.backbone.bottom_up.res2[1].conv3,  'weight'),

(model.backbone.bottom_up.res2[2].conv1,  'weight'),
(model.backbone.bottom_up.res2[2].conv2,  'weight'),
(model.backbone.bottom_up.res2[2].conv3,  'weight'),
    
    
#####res3
(model.backbone.bottom_up.res3[0].conv1,  'weight'),
(model.backbone.bottom_up.res3[0].conv2,  'weight'),
(model.backbone.bottom_up.res3[0].conv3,  'weight'),

(model.backbone.bottom_up.res3[1].conv1,  'weight'),
(model.backbone.bottom_up.res3[1].conv2,  'weight'),
(model.backbone.bottom_up.res3[1].conv3,  'weight'),

(model.backbone.bottom_up.res3[2].conv1,  'weight'),
(model.backbone.bottom_up.res3[2].conv2,  'weight'),
(model.backbone.bottom_up.res3[2].conv3,  'weight'),

(model.backbone.bottom_up.res3[3].conv1,  'weight'),
(model.backbone.bottom_up.res3[3].conv2,  'weight'),
(model.backbone.bottom_up.res3[3].conv3,  'weight'),

#####res4
(model.backbone.bottom_up.res4[0].conv1,  'weight'),
(model.backbone.bottom_up.res4[0].conv2,  'weight'),
(model.backbone.bottom_up.res4[0].conv3,  'weight'),

(model.backbone.bottom_up.res4[1].conv1,  'weight'),
(model.backbone.bottom_up.res4[1].conv2,  'weight'),
(model.backbone.bottom_up.res4[1].conv3,  'weight'),

(model.backbone.bottom_up.res4[2].conv1,  'weight'),
(model.backbone.bottom_up.res4[2].conv2,  'weight'),
(model.backbone.bottom_up.res4[2].conv3,  'weight'),

(model.backbone.bottom_up.res4[3].conv1,  'weight'),
(model.backbone.bottom_up.res4[3].conv2,  'weight'),
(model.backbone.bottom_up.res4[3].conv3,  'weight'),
    
(model.backbone.bottom_up.res4[4].conv1,  'weight'),
(model.backbone.bottom_up.res4[4].conv2,  'weight'),
(model.backbone.bottom_up.res4[4].conv3,  'weight'),

(model.backbone.bottom_up.res4[5].conv1,  'weight'),
(model.backbone.bottom_up.res4[5].conv2,  'weight'),
(model.backbone.bottom_up.res4[5].conv3,  'weight'),

#####res5
(model.backbone.bottom_up.res5[0].conv1,  'weight'),
(model.backbone.bottom_up.res5[0].conv2,  'weight'),
(model.backbone.bottom_up.res5[0].conv3,  'weight'),

(model.backbone.bottom_up.res5[1].conv1,  'weight'),
(model.backbone.bottom_up.res5[1].conv2,  'weight'),
(model.backbone.bottom_up.res5[1].conv3,  'weight'),

(model.backbone.bottom_up.res5[2].conv1,  'weight'),
(model.backbone.bottom_up.res5[2].conv2,  'weight'),
(model.backbone.bottom_up.res5[2].conv3,  'weight')
)

#### Prune the parameters

In [29]:
import torch.nn.utils.prune as prune
prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=0.2,
)

#### After removing the re-parametrization

In [None]:
for i in range(6):
    
    if i < 3:
        #res2
        prune.remove(model.backbone.bottom_up.res2[i].conv1,  'weight')
        prune.remove(model.backbone.bottom_up.res2[i].conv2,  'weight')
        prune.remove(model.backbone.bottom_up.res2[i].conv3,  'weight')

        #res3
        prune.remove(model.backbone.bottom_up.res3[i].conv1,  'weight')
        prune.remove(model.backbone.bottom_up.res3[i].conv2,  'weight')
        prune.remove(model.backbone.bottom_up.res3[i].conv3,  'weight')

        #res4
        prune.remove(model.backbone.bottom_up.res4[i].conv1,  'weight')
        prune.remove(model.backbone.bottom_up.res4[i].conv2,  'weight')
        prune.remove(model.backbone.bottom_up.res4[i].conv3,  'weight')

        #res5
        prune.remove(model.backbone.bottom_up.res5[i].conv1,  'weight')
        prune.remove(model.backbone.bottom_up.res5[i].conv2,  'weight')
        prune.remove(model.backbone.bottom_up.res5[i].conv3,  'weight')
    
    elif i < 4:
        #res3
        prune.remove(model.backbone.bottom_up.res3[i].conv1,  'weight')
        prune.remove(model.backbone.bottom_up.res3[i].conv2,  'weight')
        prune.remove(model.backbone.bottom_up.res3[i].conv3,  'weight')
        
        #res4
        prune.remove(model.backbone.bottom_up.res4[i].conv1,  'weight')
        prune.remove(model.backbone.bottom_up.res4[i].conv2,  'weight')
        prune.remove(model.backbone.bottom_up.res4[i].conv3,  'weight')
        
    else:
        #res4
        prune.remove(model.backbone.bottom_up.res4[i].conv1,  'weight')
        prune.remove(model.backbone.bottom_up.res4[i].conv2,  'weight')
        prune.remove(model.backbone.bottom_up.res4[i].conv3,  'weight')