# Import the project

In [1]:
from detect import *

from torchsummary import summary

# Some functions

In [2]:
def _draw_and_save_output_images(img_detections, imgs, img_size, output_path, classes):
    """Draws detections in output images and stores them.

    :param img_detections: List of detections
    :type img_detections: [Tensor]
    :param imgs: List of paths to image files
    :type imgs: [str]
    :param img_size: Size of each image dimension for yolo
    :type img_size: int
    :param output_path: Path of output directory
    :type output_path: str
    :param classes: List of class names
    :type classes: [str]
    """

    # Iterate through images and save plot of detections
    for (image_path, detections) in zip(imgs, img_detections):
        print(f"Image {image_path}:")
        _draw_and_save_output_image(
            image_path, detections, img_size, output_path, classes)


def _draw_and_save_output_image(image_path, detections, img_size, output_path, classes):
    """Draws detections in output image and stores this.

    :param image_path: Path to input image
    :type image_path: str
    :param detections: List of detections on image
    :type detections: [Tensor]
    :param img_size: Size of each image dimension for yolo
    :type img_size: int
    :param output_path: Path of output directory
    :type output_path: str
    :param classes: List of class names
    :type classes: [str]
    """
    # Create plot
    img = np.array(Image.open(image_path))
    plt.figure()
    fig, ax = plt.subplots(1)
    ax.imshow(img)
    # Rescale boxes to original image
    detections = rescale_boxes(detections, img_size, img.shape[:2])
    unique_labels = detections[:, -1].cpu().unique()
    n_cls_preds = len(unique_labels)
    # Bounding-box colors
    cmap = plt.get_cmap("tab20b")
    colors = [cmap(i) for i in np.linspace(0, 1, n_cls_preds)]
    bbox_colors = random.sample(colors, n_cls_preds)
    for x1, y1, x2, y2, conf, cls_pred in detections:

        print(f"\t+ Label: {classes[int(cls_pred)]} | Confidence: {conf.item():0.4f}")

        box_w = x2 - x1
        box_h = y2 - y1

        color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])]
        # Create a Rectangle patch
        bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor="none")
        # Add the bbox to the plot
        ax.add_patch(bbox)
        # Add label
        plt.text(
            x1,
            y1,
            s=f"{classes[int(cls_pred)]}: {conf:.2f}",
            color="white",
            verticalalignment="top",
            bbox={"color": color, "pad": 0})

    # Save generated image with detections
    plt.axis("off")
    plt.gca().xaxis.set_major_locator(NullLocator())
    plt.gca().yaxis.set_major_locator(NullLocator())
    filename = os.path.basename(image_path).split(".")[0]
    output_path = os.path.join(output_path, f"{filename}.png")
    plt.savefig(output_path, bbox_inches="tight", pad_inches=0.0)
    plt.close()


def _create_data_loader(img_path, batch_size, img_size, n_cpu):
    """Creates a DataLoader for inferencing.

    :param img_path: Path to file containing all paths to validation images.
    :type img_path: str
    :param batch_size: Size of each image batch
    :type batch_size: int
    :param img_size: Size of each image dimension for yolo
    :type img_size: int
    :param n_cpu: Number of cpu threads to use during batch generation
    :type n_cpu: int
    :return: Returns DataLoader
    :rtype: DataLoader
    """
    dataset = ImageFolder(
        img_path,
        transform=transforms.Compose([DEFAULT_TRANSFORMS, Resize(img_size)]))
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=n_cpu,
        pin_memory=True)
    return dataloader

# Arguments

In [3]:
parser = argparse.ArgumentParser(description="Detect objects on images.")
parser.add_argument("-m", "--model", type=str, default="../config/yolomask.cfg", help="Path to model definition file (.cfg)")
parser.add_argument("-w", "--weights", type=str, default="../weights/darknet53.conv.74", help="Path to weights or checkpoint file (.weights or .pth)")
parser.add_argument("-i", "--images", type=str, default="../data/samples", help="Path to directory with images to inference")
parser.add_argument("-c", "--classes", type=str, default="../data/coco.names", help="Path to classes label file (.names)")
parser.add_argument("-o", "--output", type=str, default="../output_mask", help="Path to output directory")
parser.add_argument("-b", "--batch_size", type=int, default=1, help="Size of each image batch")
parser.add_argument("--img_size", type=int, default=416, help="Size of each image dimension for yolo")
parser.add_argument("--n_cpu", type=int, default=8, help="Number of cpu threads to use during batch generation")
parser.add_argument("--conf_thres", type=float, default=0.5, help="Object confidence threshold")
parser.add_argument("--nms_thres", type=float, default=0.4, help="IOU threshold for non-maximum suppression")
args, unknown = parser.parse_known_args()
print(f"Command line arguments: {args}")

Command line arguments: Namespace(model='../config/yolomask.cfg', weights='../weights/darknet53.conv.74', images='../data/samples', classes='../data/coco.names', output='../output_mask', batch_size=1, img_size=416, n_cpu=8, conf_thres=0.5, nms_thres=0.4)


In [4]:
model_path      = args.model

img_path        = args.images
classes         = load_classes(args.classes)
output_path     = args.output
batch_size      = args.batch_size
img_size        = args.img_size
n_cpu           = args.n_cpu
conf_thres      = args.conf_thres
nms_thres       = args.nms_thres

# Model summary

In [None]:
weights_path    = "../weights/yolov3.weights"

In [64]:
dataloader = _create_data_loader(img_path, batch_size, img_size, n_cpu)
model = load_model(model_path, weights_path)

RuntimeError: shape '[256, 128, 3, 3]' is invalid for input of size 196607

Esto da un error ya que al trata de cargar los pesos de yolov3 no concuerda el tamano porque estos no tienen las mascaras.


Por lo tanto se tiene que cargar el modelo solo con los pesos de la darknet

In [5]:
weights_path    = "../weights/darknet53.conv.74"

In [20]:
dataloader = _create_data_loader(img_path, batch_size, img_size, n_cpu)
model = load_model(model_path, weights_path)


In [16]:
for name, module in model.module_list.named_children():
    print(module)

Sequential(
  (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (batch_norm_0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (leaky_0): LeakyReLU(negative_slope=0.1)
)
Sequential(
  (conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (leaky_1): LeakyReLU(negative_slope=0.1)
)
Sequential(
  (conv_2): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (batch_norm_2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (leaky_2): LeakyReLU(negative_slope=0.1)
)
Sequential(
  (conv_3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (batch_norm_3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (leaky_3): LeakyReLU(negative_slope=0.1)
)
Sequential(
  (shortcut_4): Sequentia

In [7]:
summary(model, input_size=(3, model.hyperparams['height'], model.hyperparams['height']))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 416, 416]             864
       BatchNorm2d-2         [-1, 32, 416, 416]              64
         LeakyReLU-3         [-1, 32, 416, 416]               0
            Conv2d-4         [-1, 64, 208, 208]          18,432
       BatchNorm2d-5         [-1, 64, 208, 208]             128
         LeakyReLU-6         [-1, 64, 208, 208]               0
            Conv2d-7         [-1, 32, 208, 208]           2,048
       BatchNorm2d-8         [-1, 32, 208, 208]              64
         LeakyReLU-9         [-1, 32, 208, 208]               0
           Conv2d-10         [-1, 64, 208, 208]          18,432
      BatchNorm2d-11         [-1, 64, 208, 208]             128
        LeakyReLU-12         [-1, 64, 208, 208]               0
           Conv2d-13        [-1, 128, 104, 104]          73,728
      BatchNorm2d-14        [-1, 128, 1

(tensor(62112989), tensor(62112989))

# Detections

In [21]:
img_detections, imgs = detect(
    model,
    dataloader,
    output_path,
    conf_thres,
    nms_thres)
_draw_and_save_output_images(
    img_detections, imgs, img_size, output_path, classes)

print(f"---- Detections were saved to: '{output_path}' ----")

Detecting: 100%|██████████| 12/12 [00:15<00:00,  1.27s/it]


Image ../data/samples\COCo girl.jpg:
Image ../data/samples\dog.jpg:
Image ../data/samples\eagle.jpg:
Image ../data/samples\field.jpg:
Image ../data/samples\giraffe.jpg:
Image ../data/samples\herd_of_horses.jpg:
Image ../data/samples\messi.jpg:
Image ../data/samples\perritos.jpeg:
Image ../data/samples\perro.jpeg:
Image ../data/samples\person.jpg:
Image ../data/samples\room.jpg:
Image ../data/samples\street.jpg:
---- Detections were saved to: '../output_mask' ----


<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

In [22]:
from torch.autograd import Variable

model.eval()  # Set model to evaluation mode

Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor

img_detections = []  # Stores detections for each image index
imgs = []  # Stores image paths

for (img_paths, input_imgs) in tqdm.tqdm(dataloader, desc="Detecting"):
    # Configure input
    input_imgs = Variable(input_imgs.type(Tensor))

    # Get detections
    with torch.no_grad():
        print('Hola')
        detections = model(input_imgs)
        #detections = non_max_suppression(detections, conf_thres, nms_thres)
        
    break

    ## Store image and detections
    #img_detections.extend(detections)
    #imgs.extend(img_paths)

Detecting:   0%|          | 0/12 [00:00<?, ?it/s]

Hola


Detecting:   0%|          | 0/12 [00:13<?, ?it/s]


In [23]:
detections.shape

torch.Size([1, 10647, 85])

In [24]:
detections[0,0,:]

tensor([ 16.0269,  16.0837, 116.9831,  88.5486,   0.4881,   0.5109,   0.5063,
          0.4923,   0.5082,   0.4996,   0.5133,   0.5156,   0.4979,   0.4978,
          0.4943,   0.5000,   0.4951,   0.4982,   0.4967,   0.5057,   0.4976,
          0.4966,   0.5014,   0.4925,   0.4998,   0.4885,   0.4996,   0.4943,
          0.4975,   0.4943,   0.4925,   0.4976,   0.4892,   0.5010,   0.4962,
          0.4996,   0.4877,   0.4837,   0.4993,   0.4972,   0.5014,   0.4934,
          0.4885,   0.4975,   0.5008,   0.5080,   0.5154,   0.4971,   0.4937,
          0.5001,   0.4998,   0.5111,   0.4996,   0.4898,   0.4990,   0.5093,
          0.5045,   0.5009,   0.5049,   0.5069,   0.5058,   0.5097,   0.4945,
          0.5009,   0.4995,   0.4993,   0.4991,   0.5083,   0.5008,   0.4997,
          0.4988,   0.5028,   0.5060,   0.4892,   0.4912,   0.4938,   0.4997,
          0.5084,   0.5040,   0.5056,   0.5024,   0.5046,   0.4980,   0.5032,
          0.5068], device='cuda:0')