In [1]:
import os
import sys
sys.path.append(os.path.abspath('../jetson-inference/python/training/detection/ssd'))
print(sys.path)


['/usr/lib/python36.zip', '/usr/lib/python3.6', '/usr/lib/python3.6/lib-dynload', '', '/usr/local/lib/python3.6/dist-packages', '/usr/local/lib/python3.6/dist-packages/torchvision-0.10.0a0+300a8a4-py3.6-linux-aarch64.egg', '/usr/local/lib/python3.6/dist-packages/Pillow-8.3.1-py3.6-linux-aarch64.egg', '/usr/local/lib/python3.6/dist-packages/torchaudio-0.9.0a0+33b2469-py3.6-linux-aarch64.egg', '/usr/lib/python3/dist-packages', '/usr/lib/python3.6/dist-packages', '/usr/local/lib/python3.6/dist-packages/IPython/extensions', '/root/.ipython', '/jetson-inference/python/training/detection/ssd']


In [8]:
import logging
import argparse
import datetime
import itertools
import torch

from torch.utils.data import DataLoader, ConcatDataset
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import CosineAnnealingLR, MultiStepLR

from vision.utils.misc import Timer, freeze_net_layers, store_labels
from vision.ssd.ssd import MatchPrior
from vision.ssd.vgg_ssd import create_vgg_ssd
from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd
from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite
from vision.ssd.mobilenet_v2_ssd_lite import create_mobilenetv2_ssd_lite
from vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite
from vision.datasets.voc_dataset import VOCDataset
from vision.datasets.open_images import OpenImagesDataset
from vision.nn.multibox_loss import MultiboxLoss
from vision.ssd.config import vgg_ssd_config
from vision.ssd.config import mobilenetv1_ssd_config
from vision.ssd.config import squeezenet_ssd_config
from vision.ssd.data_preprocessing import TrainAugmentation, TestTransform




In [12]:
import torch
from vision.ssd.vgg_ssd import create_vgg_ssd, create_vgg_ssd_predictor
from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd, create_mobilenetv1_ssd_predictor
from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite, create_mobilenetv1_ssd_lite_predictor
from vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite, create_squeezenet_ssd_lite_predictor
from vision.datasets.voc_dataset import VOCDataset
from vision.datasets.open_images import OpenImagesDataset
from vision.utils import box_utils, measurements
from vision.utils.misc import str2bool, Timer
import argparse
import pathlib
import numpy as np
import logging
import sys
from vision.ssd.mobilenet_v2_ssd_lite import create_mobilenetv2_ssd_lite, create_mobilenetv2_ssd_lite_predictor


class MeanAPEvaluator:
    """
    Mean Average Precision (mAP) evaluator
    """
    def __init__(self, dataset, net, arch='mb1-ssd', eval_dir='models/eval_results', 
                 nms_method='hard', iou_threshold=0.5, use_2007_metric=True, device='cuda:0'):
                 
        self.dataset = dataset
        self.net = net
        self.iou_threshold = iou_threshold
        self.use_2007_metric = use_2007_metric

        self.eval_path = pathlib.Path(eval_dir)
        self.eval_path.mkdir(exist_ok=True)
    
        self.true_case_stat, self.all_gb_boxes, self.all_difficult_cases = self.group_annotation_by_class(self.dataset)
        
        if arch == 'vgg16-ssd':
            self.predictor = create_vgg_ssd_predictor(net, nms_method=nms_method, device=device)
        elif arch == 'mb1-ssd':
            self.predictor = create_mobilenetv1_ssd_predictor(net, nms_method=nms_method, device=device)
        elif arch == 'mb1-ssd-lite':
            self.predictor = create_mobilenetv1_ssd_lite_predictor(net, nms_method=nms_method, device=device)
        elif arch == 'sq-ssd-lite':
            self.predictor = create_squeezenet_ssd_lite_predictor(net,nms_method=nms_method, device=device)
        elif arch == 'mb2-ssd-lite':
            self.predictor = create_mobilenetv2_ssd_lite_predictor(net, nms_method=nms_method, device=device)
        else:
            raise ValueError(f"Invalid network architecture type '{arch}' - it should be one of:  vgg16-ssd, mb1-ssd, mb1-ssd-lite, mb2-ssd-lite, sq-ssd-lite")

    def compute(self):
        is_test = self.net.is_test
        self.net.is_test = True
        
        results = []

        for i in range(len(self.dataset)):
            logging.debug(f"evaluating average precision   image {i} / {len(self.dataset)}")
            image = self.dataset.get_image(i)
            boxes, labels, probs = self.predictor.predict(image)
            indexes = torch.ones(labels.size(0), 1, dtype=torch.float32) * i
            results.append(torch.cat([
                indexes.reshape(-1, 1),
                labels.reshape(-1, 1).float(),
                probs.reshape(-1, 1),
                boxes + 1.0  # matlab's indexes start from 1
            ], dim=1))
            
        results = torch.cat(results)
        self.net.is_test = is_test
        
        for class_index, class_name in enumerate(self.dataset.class_names):
            if class_index == 0: continue  # ignore background
            prediction_path = self.eval_path / f"det_test_{class_name}.txt"
            with open(prediction_path, "w") as f:
                sub = results[results[:, 1] == class_index, :]
                for i in range(sub.size(0)):
                    prob_box = sub[i, 2:].numpy()
                    image_id = self.dataset.ids[int(sub[i, 0])]
                    print(
                        image_id + "\t" + " ".join([str(v) for v in prob_box]).replace(" ", "\t"),
                        file=f
                    )
        aps = []
        
        for class_index, class_name in enumerate(self.dataset.class_names):
            if class_index == 0:
                continue
            prediction_path = self.eval_path / f"det_test_{class_name}.txt"
            ap = self.compute_average_precision_per_class(
                self.true_case_stat[class_index],
                self.all_gb_boxes[class_index],
                self.all_difficult_cases[class_index],
                prediction_path,
                self.iou_threshold,
                self.use_2007_metric
            )
            aps.append(ap)

        return sum(aps)/len(aps), aps
      
    def log_results(self, mean_ap, class_ap, prefix=''):
        logging.info(f"{prefix}Average Precision Per-class:")
        
        for i in range(len(class_ap)):
            logging.info(f"    {self.dataset.class_names[i+1]}: {class_ap[i]}")
            
        logging.info(f"{prefix}Mean Average Precision (mAP):  {mean_ap}")
        
    def group_annotation_by_class(self, dataset):
        true_case_stat = {}
        all_gt_boxes = {}
        all_difficult_cases = {}
        for i in range(len(dataset)):
            image_id, annotation = dataset.get_annotation(i)
            gt_boxes, classes, is_difficult = annotation
            gt_boxes = torch.from_numpy(gt_boxes)
            for i, difficult in enumerate(is_difficult):
                class_index = int(classes[i])
                gt_box = gt_boxes[i]
                if not difficult:
                    true_case_stat[class_index] = true_case_stat.get(class_index, 0) + 1

                if class_index not in all_gt_boxes:
                    all_gt_boxes[class_index] = {}
                if image_id not in all_gt_boxes[class_index]:
                    all_gt_boxes[class_index][image_id] = []
                all_gt_boxes[class_index][image_id].append(gt_box)
                if class_index not in all_difficult_cases:
                    all_difficult_cases[class_index]={}
                if image_id not in all_difficult_cases[class_index]:
                    all_difficult_cases[class_index][image_id] = []
                all_difficult_cases[class_index][image_id].append(difficult)

        for class_index in all_gt_boxes:
            for image_id in all_gt_boxes[class_index]:
                all_gt_boxes[class_index][image_id] = torch.stack(all_gt_boxes[class_index][image_id])
        for class_index in all_difficult_cases:
            for image_id in all_difficult_cases[class_index]:
                all_gt_boxes[class_index][image_id] = all_gt_boxes[class_index][image_id].clone().detach() #torch.tensor(all_gt_boxes[class_index][image_id])
        return true_case_stat, all_gt_boxes, all_difficult_cases


    def compute_average_precision_per_class(self, num_true_cases, gt_boxes, difficult_cases,
                                            prediction_file, iou_threshold, use_2007_metric):
        with open(prediction_file) as f:
            image_ids = []
            boxes = []
            scores = []
            for line in f:
                t = line.rstrip().split("\t")
                image_ids.append(t[0])
                scores.append(float(t[1]))
                box = torch.tensor([float(v) for v in t[2:]]).unsqueeze(0)
                box -= 1.0  # convert to python format where indexes start from 0
                boxes.append(box)
            scores = np.array(scores)
            sorted_indexes = np.argsort(-scores)
            boxes = [boxes[i] for i in sorted_indexes]
            image_ids = [image_ids[i] for i in sorted_indexes]
            true_positive = np.zeros(len(image_ids))
            false_positive = np.zeros(len(image_ids))
            matched = set()
            for i, image_id in enumerate(image_ids):
                box = boxes[i]
                if image_id not in gt_boxes:
                    false_positive[i] = 1
                    continue

                gt_box = gt_boxes[image_id]
                ious = box_utils.iou_of(box, gt_box)
                max_iou = torch.max(ious).item()
                max_arg = torch.argmax(ious).item()
                if max_iou > iou_threshold:
                    if difficult_cases[image_id][max_arg] == 0:
                        if (image_id, max_arg) not in matched:
                            true_positive[i] = 1
                            matched.add((image_id, max_arg))
                        else:
                            false_positive[i] = 1
                else:
                    false_positive[i] = 1

        true_positive = true_positive.cumsum()
        false_positive = false_positive.cumsum()
        precision = true_positive / (true_positive + false_positive)
        recall = true_positive / num_true_cases
        if use_2007_metric:
            return measurements.compute_voc2007_average_precision(precision, recall)
        else:
            return measurements.compute_average_precision(precision, recall)



In [28]:
DATA_DIR='/nvdli-nano/data/objectdetection/images/fruit'
MODEL_DIR='/nvdli-nano/data/objectdetection/model/fruit'
DEFAULT_PRETRAINED_MODEL='../jetson-inference/python/training/detection/ssd/models/mobilenet-v1-ssd-mp-0_675.pth'
CHECK_POINT='../jetson-inference/python/training/detection/ssd/models/'

import os
if not os.path.exists(MODEL_DIR):
    os.makedirs(MODEL_DIR)
                
logging.basicConfig(stream=sys.stdout, level=getattr(logging, 'INFO', logging.INFO), format='%(asctime)s - %(message)s', datefmt="%Y-%m-%d %H:%M:%S")
                    
tensorboard = SummaryWriter(log_dir=os.path.join(CHECK_POINT, "tensorboard", f"{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"))
timer = Timer()


In [32]:
NUM_EPOCHS = 10
BATCH = 4
DEBUG_STEPS = 10
NUM_WORKERS = 2
VALIDATION_EPOCHS=1
LR = 0.01
MOMENTUM = 0.9
WEIGHT_DECAY = 5e-4
GAMA = 0.1
BASE_NET_LR = 0.001
T_MAX = 100
DEVICE='cuda'

In [14]:
!python3 ../jetson-inference/python/training/detection/ssd/open_images_downloader.py --max-image=2500 --class-names "Apple,Orange,Banana,Strawberry" --data={DATA_DIR}

2024-02-16 17:06:42 - Download https://storage.googleapis.com/openimages/2018_04/class-descriptions-boxable.csv.
2024-02-16 17:06:43 - Requested 4 classes, found 4 classes
2024-02-16 17:06:43 - Download https://storage.googleapis.com/openimages/2018_04/train/train-annotations-bbox.csv.
2024-02-16 17:07:29 - Read annotation file /nvdli-nano/data/objectdetection/images/fruit/train-annotations-bbox.csv
2024-02-16 17:09:40 - Available train images:  3683
2024-02-16 17:09:40 - Available train boxes:   18935

2024-02-16 17:09:40 - Download https://storage.googleapis.com/openimages/2018_04/validation/validation-annotations-bbox.csv.
2024-02-16 17:09:41 - Read annotation file /nvdli-nano/data/objectdetection/images/fruit/validation-annotations-bbox.csv
2024-02-16 17:09:42 - Available validation images:  186
2024-02-16 17:09:42 - Available validation boxes:   598

2024-02-16 17:09:42 - Download https://storage.googleapis.com/openimages/2018_04/test/test-annotations-bbox.csv.
2024-02-16 17:09:45

In [17]:
def train(loader, net, criterion, optimizer, device, debug_steps=100, epoch=-1):
    net.train(True)
    
    train_loss = 0.0
    train_regression_loss = 0.0
    train_classification_loss = 0.0
    
    running_loss = 0.0
    running_regression_loss = 0.0
    running_classification_loss = 0.0
    
    num_batches = 0
    
    for i, data in enumerate(loader):
        images, boxes, labels = data
        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        confidence, locations = net(images)
        regression_loss, classification_loss = criterion(confidence, locations, labels, boxes)
        loss = regression_loss + classification_loss
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_regression_loss += regression_loss.item()
        train_classification_loss += classification_loss.item()
        
        running_loss += loss.item()
        running_regression_loss += regression_loss.item()
        running_classification_loss += classification_loss.item()

        if i and i % debug_steps == 0:
            avg_loss = running_loss / debug_steps
            avg_reg_loss = running_regression_loss / debug_steps
            avg_clf_loss = running_classification_loss / debug_steps
            logging.info(
                f"Epoch: {epoch}, Step: {i}/{len(loader)}, " +
                f"Avg Loss: {avg_loss:.4f}, " +
                f"Avg Regression Loss {avg_reg_loss:.4f}, " +
                f"Avg Classification Loss: {avg_clf_loss:.4f}"
            )
            running_loss = 0.0
            running_regression_loss = 0.0
            running_classification_loss = 0.0

        num_batches += 1
        
    train_loss /= num_batches
    train_regression_loss /= num_batches
    train_classification_loss /= num_batches
    
    logging.info(
        f"Epoch: {epoch}, " +
        f"Training Loss: {train_loss:.4f}, " +
        f"Training Regression Loss {train_regression_loss:.4f}, " +
        f"Training Classification Loss: {train_classification_loss:.4f}"
    )
     
    tensorboard.add_scalar('Loss/train', train_loss, epoch)
    tensorboard.add_scalar('Regression Loss/train', train_regression_loss, epoch)
    tensorboard.add_scalar('Classification Loss/train', train_classification_loss, epoch)

In [18]:
def test(loader, net, criterion, device):
    net.eval()
    running_loss = 0.0
    running_regression_loss = 0.0
    running_classification_loss = 0.0
    num = 0
    for _, data in enumerate(loader):
        images, boxes, labels = data
        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)
        num += 1

        with torch.no_grad():
            confidence, locations = net(images)
            regression_loss, classification_loss = criterion(confidence, locations, labels, boxes)
            loss = regression_loss + classification_loss

        running_loss += loss.item()
        running_regression_loss += regression_loss.item()
        running_classification_loss += classification_loss.item()
    
    return running_loss / num, running_regression_loss / num, running_classification_loss / num

In [19]:
create_net = create_mobilenetv1_ssd
config = mobilenetv1_ssd_config

In [20]:
train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std)
target_transform = MatchPrior(config.priors, config.center_variance,
                              config.size_variance, 0.5)
test_transform = TestTransform(config.image_size, config.image_mean, config.image_std)

In [22]:
logging.info("Prepare training datasets.")
datasets = []
dataset = OpenImagesDataset(DATA_DIR, transform=train_transform, target_transform=target_transform,dataset_type="train", balance_data=True)
label_file = os.path.join(CHECK_POINT, "labels.txt")
store_labels(label_file, dataset.class_names)
logging.info(dataset)
num_classes = len(dataset.class_names)       
datasets.append(dataset)

# create training dataset
logging.info(f"Stored labels into file {label_file}.")
train_dataset = ConcatDataset(datasets)
logging.info("Train dataset size: {}".format(len(train_dataset)))
train_loader = DataLoader(train_dataset, BATCH, num_workers=NUM_WORKERS, shuffle=True)

2024-02-16 17:29:17 - Prepare training datasets.
2024-02-16 17:29:17 - loading annotations from: /nvdli-nano/data/objectdetection/images/fruit/sub-train-annotations-bbox.csv
2024-02-16 17:29:17 - annotations loaded from:  /nvdli-nano/data/objectdetection/images/fruit/sub-train-annotations-bbox.csv
num images:  2046
2024-02-16 17:29:23 - balancing data
2024-02-16 17:29:23 - Dataset Summary:Number of Images: 1616
Minimum Number of Images for a Class: 412
Label Distribution:
	Apple: 1596
	Banana: 933
	Orange: 2949
	Strawberry: 2726
2024-02-16 17:29:23 - Stored labels into file ../jetson-inference/python/training/detection/ssd/models/labels.txt.
2024-02-16 17:29:23 - Train dataset size: 1616


In [34]:
logging.info("Prepare Validation datasets.")
val_dataset = OpenImagesDataset(DATA_DIR, transform=test_transform, target_transform=target_transform, dataset_type="test")
logging.info(val_dataset)
logging.info("Validation dataset size: {}".format(len(val_dataset)))
val_loader = DataLoader(val_dataset, BATCH,
                        num_workers=NUM_WORKERS,
                        shuffle=False)

# create the network
logging.info("Build network.")
net = create_net(num_classes)
min_loss = -10000.0
last_epoch = -1

params = [
            {'params': net.base_net.parameters(), 'lr': base_net_lr},
            {'params': itertools.chain(
                net.source_layer_add_ons.parameters(),
                net.extras.parameters()
            ), 'lr': extra_layers_lr},
            {'params': itertools.chain(
                net.regression_headers.parameters(),
                net.classification_headers.parameters()
            )}
        ]

2024-02-16 17:34:58 - Prepare Validation datasets.
2024-02-16 17:34:58 - loading annotations from: /nvdli-nano/data/objectdetection/images/fruit/sub-test-annotations-bbox.csv
2024-02-16 17:34:58 - annotations loaded from:  /nvdli-nano/data/objectdetection/images/fruit/sub-test-annotations-bbox.csv
num images:  350
2024-02-16 17:34:59 - Dataset Summary:Number of Images: 350
Minimum Number of Images for a Class: -1
Label Distribution:
	Apple: 210
	Banana: 90
	Orange: 551
	Strawberry: 421
2024-02-16 17:34:59 - Validation dataset size: 350
2024-02-16 17:34:59 - Build network.


In [35]:
timer.start("Load Model")
base_net_lr = BASE_NET_LR
extra_layers_lr = None
logging.info(f"Init from pretrained SSD")
if not os.path.exists(DEFAULT_PRETRAINED_MODEL):
    os.system(f"wget --quiet --show-progress --progress=bar:force:noscroll --no-check-certificate https://nvidia.box.com/shared/static/djf5w54rjvpqocsiztzaandq1m3avr7c.pth -O {DEFAULT_PRETRAINED_MODEL}")
net.init_from_pretrained_ssd(DEFAULT_PRETRAINED_MODEL)

logging.info(f'Took {timer.end("Load Model"):.2f} seconds to load the model.')
net.to(DEVICE)

# define loss function and optimizer
criterion = MultiboxLoss(config.priors, iou_threshold=0.5, neg_pos_ratio=3,
                         center_variance=0.1, size_variance=0.2, device=DEVICE)

optimizer = torch.optim.SGD(params, lr=LR, momentum=MOMENTUM,
                            weight_decay=WEIGHT_DECAY)

logging.info(f"Learning rate: {LR}, Base net learning rate: {base_net_lr}, "
             + f"Extra Layers learning rate: {extra_layers_lr}.")
logging.info("Uses CosineAnnealingLR scheduler.")

scheduler = CosineAnnealingLR(optimizer, T_MAX, last_epoch=last_epoch)
                        
logging.info(f"Start training from epoch {last_epoch + 1}.")



2024-02-16 17:35:02 - Init from pretrained SSD
2024-02-16 17:35:02 - Took 0.15 seconds to load the model.
2024-02-16 17:35:02 - Learning rate: 0.01, Base net learning rate: 0.001, Extra Layers learning rate: None.
2024-02-16 17:35:02 - Uses CosineAnnealingLR scheduler.
2024-02-16 17:35:02 - Start training from epoch 0.


In [38]:
for epoch in range(last_epoch + 1, NUM_EPOCHS):
        train(train_loader, net, criterion, optimizer, device=DEVICE, debug_steps=DEBUG_STEPS, epoch=epoch)
        scheduler.step()
        
        if epoch % VALIDATION_EPOCHS == 0 or epoch == NUM_EPOCHS - 1:
            val_loss, val_regression_loss, val_classification_loss = test(val_loader, net, criterion, DEVICE)
            
            logging.info(
                f"Epoch: {epoch}, " +
                f"Validation Loss: {val_loss:.4f}, " +
                f"Validation Regression Loss {val_regression_loss:.4f}, " +
                f"Validation Classification Loss: {val_classification_loss:.4f}"
            )
                    
            tensorboard.add_scalar('Loss/val', val_loss, epoch)
            tensorboard.add_scalar('Regression Loss/val', val_regression_loss, epoch)
            tensorboard.add_scalar('Classification Loss/val', val_classification_loss, epoch)
    
            
            mean_ap, class_ap = eval.compute()
            eval.log_results(mean_ap, class_ap, f"Epoch: {epoch}, ")

            tensorboard.add_scalar('Mean Average Precision/val', mean_ap, epoch)

            for i in range(len(class_ap)):
                tensorboard.add_scalar(f"Class Average Precision/{eval_dataset.class_names[i+1]}", class_ap[i], epoch)
    
            model_path = os.path.join(CHECK_POINT, f"MOBILE-SSD1-Epoch-{epoch}-Loss-{val_loss}.pth")
            net.save(model_path)
            logging.info(f"Saved model {model_path}")

logging.info("Task done, exiting program.")
tensorboard.close()



TypeError: bad operand type for unary -: 'NoneType'