In [1]:
!pip install imgaug
!pip install terminaltables
!pip install torchsummary



In [2]:
from __future__ import division

import os
import argparse
import tqdm

import torch
from torch.utils.data import DataLoader
import torch.optim as optim

from pytorchyolo.models import load_model
from pytorchyolo.utils.logger import Logger
from pytorchyolo.utils.utils import to_cpu, load_classes, print_environment_info, provide_determinism, worker_seed_set
from pytorchyolo.utils.datasets import ListDataset
from pytorchyolo.utils.augmentations import AUGMENTATION_TRANSFORMS
# from pytorchyolo.utils.transforms import DEFAULT_TRANSFORMS
from pytorchyolo.utils.parse_config import parse_data_config
from pytorchyolo.utils.loss import compute_loss
from pytorchyolo.test import _evaluate, _create_validation_data_loader

from terminaltables import AsciiTable

from torchsummary import summary

In [3]:


def _create_data_loader(img_path, batch_size, img_size, n_cpu, multiscale_training=False):
    """Creates a DataLoader for training.

    :param img_path: Path to file containing all paths to training images.
    :type img_path: str
    :param batch_size: Size of each image batch
    :type batch_size: int
    :param img_size: Size of each image dimension for yolo
    :type img_size: int
    :param n_cpu: Number of cpu threads to use during batch generation
    :type n_cpu: int
    :param multiscale_training: Scale images to different sizes randomly
    :type multiscale_training: bool
    :return: Returns DataLoader
    :rtype: DataLoader
    """
    dataset = ListDataset(
        img_path,
        img_size=img_size,
        multiscale=multiscale_training,
        transform=AUGMENTATION_TRANSFORMS)
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=n_cpu,
        pin_memory=True,
        collate_fn=dataset.collate_fn,
        worker_init_fn=worker_seed_set)
    return dataloader

In [4]:
parser = argparse.ArgumentParser(description="Trains the YOLO model.")
parser.add_argument("-m", "--model", type=str, default="/home/hh239/ece590/codes/PyTorch-YOLOv3/config/yolov3.cfg", help="Path to model definition file (.cfg)")
parser.add_argument("-d", "--data", type=str, default="config/coco.data", help="Path to data config file (.data)")
parser.add_argument("-e", "--epochs", type=int, default=10, help="Number of epochs")
parser.add_argument("-v", "--verbose", action='store_true', help="Makes the training more verbose")
parser.add_argument("--n_cpu", type=int, default=8, help="Number of cpu threads to use during batch generation")
parser.add_argument("--pretrained_weights", type=str, help="Path to checkpoint file (.weights or .pth). Starts training from checkpoint model")
parser.add_argument("--checkpoint_interval", type=int, default=1, help="Interval of epochs between saving model weights")
parser.add_argument("--evaluation_interval", type=int, default=1, help="Interval of epochs between evaluations on validation set")
parser.add_argument("--multiscale_training", action="store_true", help="Allow multi-scale training")
parser.add_argument("--iou_thres", type=float, default=0.5, help="Evaluation: IOU threshold required to qualify as detected")
parser.add_argument("--conf_thres", type=float, default=0.1, help="Evaluation: Object confidence threshold")
parser.add_argument("--nms_thres", type=float, default=0.5, help="Evaluation: IOU threshold for non-maximum suppression")
parser.add_argument("--logdir", type=str, default="logs", help="Directory for training log files (e.g. for TensorBoard)")
parser.add_argument("--seed", type=int, default=-1, help="Makes results reproducable. Set -1 to disable.")


_StoreAction(option_strings=['--seed'], dest='seed', nargs=None, const=None, default=-1, type=<class 'int'>, choices=None, help='Makes results reproducable. Set -1 to disable.', metavar=None)

In [5]:
args, unknown = parser.parse_known_args()


In [6]:
def run():

    if args.seed != -1:
        provide_determinism(args.seed)

    logger = Logger(args.logdir)  # Tensorboard logger

    # Create output directories if missing
    os.makedirs("output", exist_ok=True)
    os.makedirs("checkpoints", exist_ok=True)

    # Get data configuration
    data_config = parse_data_config(args.data)
    train_path = data_config["train"]
    valid_path = data_config["valid"]
    class_names = load_classes(data_config["names"])
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # ############
    # Create model
    # ############
    
    model = load_model(args.model, args.pretrained_weights)

    # Print model
    if args.verbose:
        summary(model, input_size=(3, model.hyperparams['height'], model.hyperparams['height']))

    #mini_batch_size = model.hyperparams['batch'] // model.hyperparams['subdivisions']
    mini_batch_size =1

    # #################
    # Create Dataloader
    # #################

    # Load training dataloader
    dataloader = _create_data_loader(
        train_path,
        mini_batch_size,
        model.hyperparams['height'],
        args.n_cpu,
        args.multiscale_training)

    # Load validation dataloader
    validation_dataloader = _create_validation_data_loader(
        valid_path,
        mini_batch_size,
        model.hyperparams['height'],
        args.n_cpu)

    # ################
    # Create optimizer
    # ################

    params = [p for p in model.parameters() if p.requires_grad]

    if (model.hyperparams['optimizer'] in [None, "adam"]):
        optimizer = optim.Adam(
            params,
            lr=model.hyperparams['learning_rate'],
            weight_decay=model.hyperparams['decay'],
        )
    elif (model.hyperparams['optimizer'] == "sgd"):
        optimizer = optim.SGD(
            params,
            lr=model.hyperparams['learning_rate'],
            weight_decay=model.hyperparams['decay'],
            momentum=model.hyperparams['momentum'])
    else:
        print("Unknown optimizer. Please choose between (adam, sgd).")

    for epoch in range(args.epochs):

        print("\n---- Training Model ----")

        model.train()  # Set model to training mode

        for batch_i, (_, imgs, targets) in enumerate(tqdm.tqdm(dataloader, desc=f"Training Epoch {epoch}")):
            batches_done = len(dataloader) * epoch + batch_i

            imgs = imgs.to(device, non_blocking=True)
            targets = targets.to(device)

            outputs = model(imgs)

            loss, loss_components = compute_loss(outputs, targets, model)

            loss.backward()

            ###############
            # Run optimizer
            ###############

            if batches_done % model.hyperparams['subdivisions'] == 0:
                # Adapt learning rate
                # Get learning rate defined in cfg
                lr = model.hyperparams['learning_rate']
                if batches_done < model.hyperparams['burn_in']:
                    # Burn in
                    lr *= (batches_done / model.hyperparams['burn_in'])
                else:
                    # Set and parse the learning rate to the steps defined in the cfg
                    for threshold, value in model.hyperparams['lr_steps']:
                        if batches_done > threshold:
                            lr *= value
                # Log the learning rate
                logger.scalar_summary("train/learning_rate", lr, batches_done)
                # Set learning rate
                for g in optimizer.param_groups:
                    g['lr'] = lr

                # Run optimizer
                optimizer.step()
                # Reset gradients
                optimizer.zero_grad()

            # ############
            # Log progress
            # ############
            if args.verbose:
                print(AsciiTable(
                    [
                        ["Type", "Value"],
                        ["IoU loss", float(loss_components[0])],
                        ["Object loss", float(loss_components[1])],
                        ["Class loss", float(loss_components[2])],
                        ["Loss", float(loss_components[3])],
                        ["Batch loss", to_cpu(loss).item()],
                    ]).table)

            # Tensorboard logging
            tensorboard_log = [
                ("train/iou_loss", float(loss_components[0])),
                ("train/obj_loss", float(loss_components[1])),
                ("train/class_loss", float(loss_components[2])),
                ("train/loss", to_cpu(loss).item())]
            logger.list_of_scalars_summary(tensorboard_log, batches_done)

            model.seen += imgs.size(0)

        # #############
        # Save progress
        # #############

        # Save model to checkpoint file
        if epoch % args.checkpoint_interval == 0:
            checkpoint_path = f"checkpoints/yolov3_ckpt_{epoch}.pth"
            print(f"---- Saving checkpoint to: '{checkpoint_path}' ----")
            torch.save(model.state_dict(), checkpoint_path)

        # ########
        # Evaluate
        # ########

        if epoch % args.evaluation_interval == 0:
            print("\n---- Evaluating Model ----")
            # Evaluate the model on the validation set
            metrics_output = _evaluate(
                model,
                validation_dataloader,
                class_names,
                img_size=model.hyperparams['height'],
                iou_thres=args.iou_thres,
                conf_thres=args.conf_thres,
                nms_thres=args.nms_thres,
                verbose=args.verbose
            )

            if metrics_output is not None:
                precision, recall, AP, f1, ap_class = metrics_output
                evaluation_metrics = [
                    ("validation/precision", precision.mean()),
                    ("validation/recall", recall.mean()),
                    ("validation/mAP", AP.mean()),
                    ("validation/f1", f1.mean())]
                logger.list_of_scalars_summary(evaluation_metrics, epoch)


In [7]:
run()

Training Epoch 0:   0%|          | 0/27 [00:00<?, ?it/s]


---- Training Model ----


Training Epoch 0: 100%|██████████| 27/27 [00:10<00:00,  2.56it/s]


---- Saving checkpoint to: 'checkpoints/yolov3_ckpt_0.pth' ----


Validating:   0%|          | 0/27 [00:00<?, ?it/s]


---- Evaluating Model ----


Validating:  30%|██▉       | 8/27 [00:04<00:13,  1.41it/s]



Validating:  48%|████▊     | 13/27 [00:06<00:08,  1.68it/s]



Validating:  67%|██████▋   | 18/27 [00:08<00:04,  1.84it/s]



Validating: 100%|██████████| 27/27 [00:12<00:00,  2.16it/s]
Computing AP: 100%|██████████| 39/39 [00:00<00:00, 2521.70it/s]
Training Epoch 1:   0%|          | 0/27 [00:00<?, ?it/s]

---- mAP 0.00040 ----

---- Training Model ----


Training Epoch 1: 100%|██████████| 27/27 [00:10<00:00,  2.61it/s]


---- Saving checkpoint to: 'checkpoints/yolov3_ckpt_1.pth' ----


Validating:   0%|          | 0/27 [00:00<?, ?it/s]


---- Evaluating Model ----


Validating:  30%|██▉       | 8/27 [00:03<00:12,  1.56it/s]



Validating:  93%|█████████▎| 25/27 [00:11<00:01,  1.49it/s]



Validating: 100%|██████████| 27/27 [00:11<00:00,  2.32it/s]
Computing AP: 100%|██████████| 39/39 [00:00<00:00, 2668.96it/s]
Training Epoch 2:   0%|          | 0/27 [00:00<?, ?it/s]

---- mAP 0.00049 ----

---- Training Model ----


Training Epoch 2: 100%|██████████| 27/27 [00:10<00:00,  2.52it/s]


---- Saving checkpoint to: 'checkpoints/yolov3_ckpt_2.pth' ----


Validating:   0%|          | 0/27 [00:00<?, ?it/s]


---- Evaluating Model ----


Validating:   7%|▋         | 2/27 [00:01<00:17,  1.43it/s]



Validating:  22%|██▏       | 6/27 [00:03<00:12,  1.62it/s]



Validating:  93%|█████████▎| 25/27 [00:11<00:01,  1.62it/s]



Validating: 100%|██████████| 27/27 [00:12<00:00,  2.20it/s]
Computing AP: 100%|██████████| 39/39 [00:00<00:00, 2537.35it/s]
Training Epoch 3:   0%|          | 0/27 [00:00<?, ?it/s]

---- mAP 0.00039 ----

---- Training Model ----


Training Epoch 3: 100%|██████████| 27/27 [00:10<00:00,  2.60it/s]


---- Saving checkpoint to: 'checkpoints/yolov3_ckpt_3.pth' ----


Validating:   0%|          | 0/27 [00:00<?, ?it/s]


---- Evaluating Model ----


Validating:  11%|█         | 3/27 [00:01<00:14,  1.65it/s]



Validating:  26%|██▌       | 7/27 [00:03<00:12,  1.59it/s]



Validating:  78%|███████▊  | 21/27 [00:09<00:03,  1.70it/s]



Validating: 100%|██████████| 27/27 [00:12<00:00,  2.23it/s]
Computing AP: 100%|██████████| 39/39 [00:00<00:00, 2733.95it/s]
Training Epoch 4:   0%|          | 0/27 [00:00<?, ?it/s]

---- mAP 0.00004 ----

---- Training Model ----


Training Epoch 4: 100%|██████████| 27/27 [00:10<00:00,  2.47it/s]


---- Saving checkpoint to: 'checkpoints/yolov3_ckpt_4.pth' ----


Validating:   0%|          | 0/27 [00:00<?, ?it/s]


---- Evaluating Model ----


Validating:  11%|█         | 3/27 [00:01<00:14,  1.64it/s]



Validating:  44%|████▍     | 12/27 [00:05<00:08,  1.73it/s]



Validating:  78%|███████▊  | 21/27 [00:09<00:03,  1.69it/s]



Validating:  93%|█████████▎| 25/27 [00:11<00:01,  1.61it/s]



Validating: 100%|██████████| 27/27 [00:12<00:00,  2.18it/s]
Computing AP: 100%|██████████| 39/39 [00:00<00:00, 2557.58it/s]
Training Epoch 5:   0%|          | 0/27 [00:00<?, ?it/s]

---- mAP 0.00039 ----

---- Training Model ----


Training Epoch 5: 100%|██████████| 27/27 [00:10<00:00,  2.51it/s]


---- Saving checkpoint to: 'checkpoints/yolov3_ckpt_5.pth' ----


Validating:   0%|          | 0/27 [00:00<?, ?it/s]


---- Evaluating Model ----


Validating:  30%|██▉       | 8/27 [00:03<00:11,  1.65it/s]



Validating:  44%|████▍     | 12/27 [00:05<00:09,  1.60it/s]



Validating:  59%|█████▉    | 16/27 [00:07<00:06,  1.61it/s]



Validating:  74%|███████▍  | 20/27 [00:09<00:04,  1.62it/s]



Validating:  93%|█████████▎| 25/27 [00:11<00:01,  1.73it/s]



Validating: 100%|██████████| 27/27 [00:12<00:00,  2.18it/s]
Computing AP: 100%|██████████| 39/39 [00:00<00:00, 2683.50it/s]
Training Epoch 6:   0%|          | 0/27 [00:00<?, ?it/s]

---- mAP 0.00002 ----

---- Training Model ----


Training Epoch 6: 100%|██████████| 27/27 [00:09<00:00,  2.77it/s]


---- Saving checkpoint to: 'checkpoints/yolov3_ckpt_6.pth' ----


Validating:   0%|          | 0/27 [00:00<?, ?it/s]


---- Evaluating Model ----


Validating:   0%|          | 0/27 [00:00<?, ?it/s]


KeyboardInterrupt: 