# Etapa 2
- Calculo da Entropia e métricas.
- Criação do agente de reforço para seleção da próxima rotulação.

In [1]:
from tqdm import tqdm
import numpy as np
from tqdm import tqdm
from pathlib import Path
from ultralytics import YOLO
from ultralytics.engine.results import Results
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch
import cv2

In [2]:
model = YOLO("runs/detect/yolov11-initial/weights/best.pt") 

In [3]:

metrics = model.val(
    data="coco.yaml",
    split="val",
    imgsz=480,
    conf=0.5,  # Limiar de confiança
    iou=0.6, # Limiar de NMS
)


Ultralytics 8.3.135  Python-3.13.2 torch-2.7.0+cu128 CUDA:0 (NVIDIA GeForce RTX 3060, 8192MiB)
YOLO11x summary (fused): 190 layers, 56,919,424 parameters, 0 gradients, 194.9 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.00.0 ms, read: 1562.2977.0 MB/s, size: 160.4 KB)


[34m[1mval: [0mScanning E:\COCO-Dataset\val2017\val\labels.cache... 4000 images, 39 backgrounds, 0 corrupt: 100%|██████████| 4000/4000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 250/250 [02:50<00:00,  1.46it/s]


                   all       4000      29401      0.793      0.519      0.667       0.54
                person       2167       8916      0.935      0.569      0.756      0.629
               bicycle        118        250      0.816      0.516      0.681      0.453
                   car        432       1587      0.876      0.452      0.674      0.528
            motorcycle        132        311      0.837      0.627      0.759      0.577
              airplane         80        117      0.868      0.786      0.868      0.749
                   bus        153        239      0.775       0.72      0.791       0.71
                 train        134        160      0.968      0.756      0.872      0.753
                 truck        198        318      0.503      0.557       0.53      0.429
                  boat         97        341       0.77      0.372      0.575      0.379
         traffic light        146        475      0.782      0.356      0.573      0.387
          fire hydran

In [None]:
metrics

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
       78, 79])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x0000020B14B6F8C0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0

In [8]:
def calculate_entropy(probabilities):
    """Calcula a entropia de Shannon para um vetor de probabilidades."""
    epsilon = 1e-10  # Evitar log(0)
    return -np.sum(probabilities * np.log(probabilities + epsilon), axis=-1)

In [16]:
class CocoValDataset(Dataset):
    def __init__(self, base_path, img_size=480):
        self.img_dir = Path(base_path) / 'val' / 'images'
        self.img_files = list(self.img_dir.glob('*.jpg'))
        
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize((img_size, img_size)),
            transforms.Normalize((0.5,),(0.5,))
        ])

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, idx):
        img_path = self.img_files[idx]
        img = cv2.imread(str(img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        return self.transform(img), img_path.name



def validate_model(model, device, data_path, batch_size=8):
    dataset = CocoValDataset(data_path)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    model.eval()
    all_confs = []
    all_entropies = []
    
    with torch.no_grad():
        for batch, img_names in tqdm(loader, desc='Validating'):
            batch = batch.to(device)
            
            # Supondo que o modelo retorna detecções no formato YOLO
            results = model(batch, save_conf=True)
            
            # Processar cada imagem no batch
            
            for i, result in enumerate(results):
    
                # Cálculo de confiança e das probabilidades nas bounding boxes
                if hasattr(result, 'boxes') and result.boxes is not None:
                    confs = result.boxes.conf.cpu().numpy()
                    entropy_per_detection = [calculate_entropy(c) for c in confs]
                    all_entropies.extend(entropy_per_detection)
                    all_confs.extend(confs.tolist())
    
    # Calcular métricas
    avg_conf = np.mean(all_confs) if all_confs else 0
    avg_entropy = np.mean(all_entropies) if all_entropies else 0
    
    print(f'\nConfiança Média: {avg_conf:.4f}')
    print(f'Entropia Média: {avg_entropy:.4f}')
    return avg_conf, avg_entropy


model.to('cuda')
    
data_path = 'E:/COCO-Dataset/val2017/'
avg_conf, avg_entropy = validate_model(model, 'cuda', data_path)



Validating:   0%|          | 0/500 [00:00<?, ?it/s]


0: 480x480 (no detections), 49.8ms
1: 480x480 (no detections), 49.8ms
2: 480x480 (no detections), 49.8ms
3: 480x480 (no detections), 49.8ms
4: 480x480 (no detections), 49.8ms
5: 480x480 (no detections), 49.8ms
6: 480x480 (no detections), 49.8ms
7: 480x480 (no detections), 49.8ms
Speed: 0.0ms preprocess, 49.8ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:   0%|          | 1/500 [00:00<04:44,  1.75it/s]


0: 480x480 1 person, 1 cell phone, 49.9ms
1: 480x480 2 laptops, 1 mouse, 49.9ms
2: 480x480 1 bowl, 49.9ms
3: 480x480 3 persons, 1 truck, 2 horses, 1 surfboard, 49.9ms
4: 480x480 3 cars, 2 trucks, 49.9ms
5: 480x480 2 persons, 1 bus, 49.9ms
6: 480x480 1 cat, 4 laptops, 49.9ms
7: 480x480 2 airplanes, 1 bird, 49.9ms
Speed: 0.0ms preprocess, 49.9ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:   0%|          | 2/500 [00:01<04:28,  1.86it/s]


0: 480x480 1 bed, 1 dining table, 46.2ms
1: 480x480 2 persons, 1 bus, 46.2ms
2: 480x480 1 bowl, 1 apple, 1 orange, 46.2ms
3: 480x480 4 wine glasss, 3 bowls, 46.2ms
4: 480x480 1 person, 46.2ms
5: 480x480 1 bottle, 1 wine glass, 1 knife, 1 spoon, 2 bowls, 1 dining table, 46.2ms
6: 480x480 3 persons, 1 skis, 46.2ms
7: 480x480 1 banana, 46.2ms
Speed: 0.0ms preprocess, 46.2ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 480)


Validating:   1%|          | 3/500 [00:01<04:11,  1.98it/s]


0: 480x480 (no detections), 43.9ms
1: 480x480 (no detections), 43.9ms
2: 480x480 (no detections), 43.9ms
3: 480x480 (no detections), 43.9ms
4: 480x480 (no detections), 43.9ms
5: 480x480 (no detections), 43.9ms
6: 480x480 (no detections), 43.9ms
7: 480x480 (no detections), 43.9ms
Speed: 0.0ms preprocess, 43.9ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:   1%|          | 4/500 [00:01<03:54,  2.11it/s]


0: 480x480 1 bed, 1 laptop, 1 book, 41.4ms
1: 480x480 1 person, 1 surfboard, 41.4ms
2: 480x480 1 cat, 5 laptops, 1 keyboard, 41.4ms
3: 480x480 4 persons, 41.4ms
4: 480x480 2 persons, 1 car, 1 bus, 41.4ms
5: 480x480 1 person, 41.4ms
6: 480x480 4 persons, 1 umbrella, 41.4ms
7: 480x480 3 airplanes, 41.4ms
Speed: 0.0ms preprocess, 41.4ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:   1%|          | 5/500 [00:02<03:47,  2.18it/s]


0: 480x480 (no detections), 44.8ms
1: 480x480 (no detections), 44.8ms
2: 480x480 (no detections), 44.8ms
3: 480x480 (no detections), 44.8ms
4: 480x480 (no detections), 44.8ms
5: 480x480 (no detections), 44.8ms
6: 480x480 (no detections), 44.8ms
7: 480x480 (no detections), 44.8ms
Speed: 0.0ms preprocess, 44.8ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:   1%|          | 6/500 [00:02<03:42,  2.22it/s]


0: 480x480 (no detections), 45.1ms
1: 480x480 (no detections), 45.1ms
2: 480x480 (no detections), 45.1ms
3: 480x480 (no detections), 45.1ms
4: 480x480 (no detections), 45.1ms
5: 480x480 (no detections), 45.1ms
6: 480x480 (no detections), 45.1ms
7: 480x480 (no detections), 45.1ms
Speed: 0.0ms preprocess, 45.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:   1%|▏         | 7/500 [00:03<03:38,  2.26it/s]


0: 480x480 4 elephants, 49.8ms
1: 480x480 1 person, 1 surfboard, 49.8ms
2: 480x480 8 persons, 1 umbrella, 1 frisbee, 49.8ms
3: 480x480 1 bench, 49.8ms
4: 480x480 13 bottles, 1 bowl, 1 refrigerator, 49.8ms
5: 480x480 1 kite, 49.8ms
6: 480x480 2 beds, 49.8ms
7: 480x480 1 vase, 49.8ms
Speed: 0.0ms preprocess, 49.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:   2%|▏         | 8/500 [00:03<03:45,  2.19it/s]


0: 480x480 2 clocks, 47.2ms
1: 480x480 1 person, 2 ties, 47.2ms
2: 480x480 2 bowls, 47.2ms
3: 480x480 1 fork, 1 broccoli, 2 pizzas, 2 cakes, 1 dining table, 47.2ms
4: 480x480 2 persons, 1 dog, 1 sheep, 47.2ms
5: 480x480 1 person, 1 banana, 47.2ms
6: 480x480 1 fire hydrant, 1 bench, 47.2ms
7: 480x480 1 person, 1 frisbee, 47.2ms
Speed: 0.0ms preprocess, 47.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:   2%|▏         | 9/500 [00:04<03:49,  2.14it/s]


0: 480x480 1 tv, 1 laptop, 42.0ms
1: 480x480 1 person, 1 umbrella, 42.0ms
2: 480x480 1 person, 1 tv, 1 laptop, 42.0ms
3: 480x480 5 persons, 3 bottles, 5 cups, 42.0ms
4: 480x480 1 fire hydrant, 42.0ms
5: 480x480 2 persons, 1 bed, 1 tv, 2 sinks, 42.0ms
6: 480x480 3 persons, 2 cars, 1 bus, 42.0ms
7: 480x480 1 bowl, 42.0ms
Speed: 0.0ms preprocess, 42.0ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 480)


Validating:   2%|▏         | 10/500 [00:04<03:45,  2.18it/s]


0: 480x480 (no detections), 46.3ms
1: 480x480 (no detections), 46.3ms
2: 480x480 (no detections), 46.3ms
3: 480x480 (no detections), 46.3ms
4: 480x480 (no detections), 46.3ms
5: 480x480 (no detections), 46.3ms
6: 480x480 (no detections), 46.3ms
7: 480x480 (no detections), 46.3ms
Speed: 0.0ms preprocess, 46.3ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:   2%|▏         | 11/500 [00:05<03:45,  2.17it/s]


0: 480x480 2 persons, 1 bench, 44.7ms
1: 480x480 1 laptop, 1 cell phone, 2 clocks, 44.7ms
2: 480x480 4 persons, 44.7ms
3: 480x480 (no detections), 44.7ms
4: 480x480 5 sheeps, 44.7ms
5: 480x480 13 persons, 1 sports ball, 1 tennis racket, 44.7ms
6: 480x480 2 persons, 1 suitcase, 44.7ms
7: 480x480 2 cups, 2 forks, 1 knife, 3 pizzas, 1 dining table, 1 tv, 44.7ms
Speed: 0.0ms preprocess, 44.7ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:   2%|▏         | 12/500 [00:05<03:42,  2.19it/s]


0: 480x480 (no detections), 41.4ms
1: 480x480 (no detections), 41.4ms
2: 480x480 (no detections), 41.4ms
3: 480x480 (no detections), 41.4ms
4: 480x480 (no detections), 41.4ms
5: 480x480 (no detections), 41.4ms
6: 480x480 (no detections), 41.4ms
7: 480x480 (no detections), 41.4ms
Speed: 0.0ms preprocess, 41.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:   3%|▎         | 13/500 [00:06<03:35,  2.26it/s]


0: 480x480 2 persons, 40.7ms
1: 480x480 1 bowl, 40.7ms
2: 480x480 1 person, 1 cup, 1 chair, 2 laptops, 40.7ms
3: 480x480 1 person, 1 frisbee, 40.7ms
4: 480x480 7 chairs, 1 couch, 2 potted plants, 1 dining table, 4 clocks, 1 vase, 40.7ms
5: 480x480 (no detections), 40.7ms
6: 480x480 4 bottles, 1 bowl, 1 refrigerator, 40.7ms
7: 480x480 1 person, 40.7ms
Speed: 0.0ms preprocess, 40.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:   3%|▎         | 14/500 [00:06<03:31,  2.30it/s]


0: 480x480 15 persons, 3 kites, 49.1ms
1: 480x480 8 persons, 1 train, 49.1ms
2: 480x480 1 cat, 49.1ms
3: 480x480 2 baseball bats, 1 toothbrush, 49.1ms
4: 480x480 1 donut, 49.1ms
5: 480x480 4 bowls, 1 carrot, 49.1ms
6: 480x480 1 bed, 49.1ms
7: 480x480 (no detections), 49.1ms
Speed: 0.0ms preprocess, 49.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:   3%|▎         | 15/500 [00:06<03:40,  2.19it/s]


0: 480x480 2 bottles, 48.6ms
1: 480x480 2 cars, 1 stop sign, 48.6ms
2: 480x480 1 cat, 1 mouse, 48.6ms
3: 480x480 1 bus, 1 train, 2 boats, 48.6ms
4: 480x480 1 person, 1 bench, 3 kites, 48.6ms
5: 480x480 1 person, 1 horse, 1 clock, 48.6ms
6: 480x480 10 sheeps, 1 zebra, 1 surfboard, 48.6ms
7: 480x480 7 persons, 1 bench, 1 horse, 1 umbrella, 48.6ms
Speed: 0.0ms preprocess, 48.6ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:   3%|▎         | 16/500 [00:07<03:46,  2.14it/s]


0: 480x480 (no detections), 47.4ms
1: 480x480 (no detections), 47.4ms
2: 480x480 (no detections), 47.4ms
3: 480x480 (no detections), 47.4ms
4: 480x480 (no detections), 47.4ms
5: 480x480 (no detections), 47.4ms
6: 480x480 (no detections), 47.4ms
7: 480x480 (no detections), 47.4ms
Speed: 0.0ms preprocess, 47.4ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:   3%|▎         | 17/500 [00:07<03:47,  2.12it/s]


0: 480x480 2 zebras, 40.8ms
1: 480x480 4 horses, 40.8ms
2: 480x480 1 car, 1 truck, 40.8ms
3: 480x480 1 person, 1 boat, 40.8ms
4: 480x480 1 person, 8 cars, 1 bus, 2 trucks, 1 traffic light, 40.8ms
5: 480x480 1 person, 3 cups, 1 bowl, 40.8ms
6: 480x480 1 person, 2 cups, 1 dining table, 40.8ms
7: 480x480 1 person, 1 orange, 40.8ms
Speed: 0.0ms preprocess, 40.8ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:   4%|▎         | 18/500 [00:08<03:40,  2.18it/s]


0: 480x480 12 kites, 62.8ms
1: 480x480 3 persons, 62.8ms
2: 480x480 1 person, 62.8ms
3: 480x480 9 persons, 4 bottles, 2 wine glasss, 8 cups, 1 fork, 1 knife, 9 bowls, 1 potted plant, 62.8ms
4: 480x480 8 persons, 62.8ms
5: 480x480 1 bottle, 1 cup, 1 bowl, 62.8ms
6: 480x480 1 person, 1 bench, 62.8ms
7: 480x480 1 bed, 62.8ms
Speed: 0.0ms preprocess, 62.8ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 480)


Validating:   4%|▍         | 19/500 [00:08<04:06,  1.95it/s]


0: 480x480 (no detections), 44.7ms
1: 480x480 (no detections), 44.7ms
2: 480x480 (no detections), 44.7ms
3: 480x480 (no detections), 44.7ms
4: 480x480 (no detections), 44.7ms
5: 480x480 (no detections), 44.7ms
6: 480x480 (no detections), 44.7ms
7: 480x480 (no detections), 44.7ms
Speed: 0.0ms preprocess, 44.7ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:   4%|▍         | 20/500 [00:09<04:04,  1.96it/s]


0: 480x480 (no detections), 34.9ms
1: 480x480 (no detections), 34.9ms
2: 480x480 (no detections), 34.9ms
3: 480x480 (no detections), 34.9ms
4: 480x480 (no detections), 34.9ms
5: 480x480 (no detections), 34.9ms
6: 480x480 (no detections), 34.9ms
7: 480x480 (no detections), 34.9ms
Speed: 0.0ms preprocess, 34.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:   4%|▍         | 21/500 [00:09<03:44,  2.13it/s]


0: 480x480 4 persons, 4 surfboards, 31.7ms
1: 480x480 2 persons, 1 elephant, 31.7ms
2: 480x480 1 person, 3 beds, 1 book, 31.7ms
3: 480x480 1 person, 1 tie, 1 laptop, 31.7ms
4: 480x480 1 airplane, 1 bird, 31.7ms
5: 480x480 2 persons, 1 skateboard, 31.7ms
6: 480x480 (no detections), 31.7ms
7: 480x480 1 person, 1 bottle, 31.7ms
Speed: 0.0ms preprocess, 31.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:   4%|▍         | 22/500 [00:10<03:27,  2.30it/s]


0: 480x480 (no detections), 36.1ms
1: 480x480 (no detections), 36.1ms
2: 480x480 (no detections), 36.1ms
3: 480x480 (no detections), 36.1ms
4: 480x480 (no detections), 36.1ms
5: 480x480 (no detections), 36.1ms
6: 480x480 (no detections), 36.1ms
7: 480x480 (no detections), 36.1ms
Speed: 0.0ms preprocess, 36.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:   5%|▍         | 23/500 [00:10<03:17,  2.42it/s]


0: 480x480 1 person, 64.3ms
1: 480x480 2 toilets, 64.3ms
2: 480x480 1 person, 64.3ms
3: 480x480 3 carrots, 64.3ms
4: 480x480 (no detections), 64.3ms
5: 480x480 3 sheeps, 1 bear, 64.3ms
6: 480x480 7 persons, 1 tie, 64.3ms
7: 480x480 1 person, 1 kite, 64.3ms
Speed: 0.0ms preprocess, 64.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:   5%|▍         | 24/500 [00:11<03:45,  2.11it/s]


0: 480x480 (no detections), 43.6ms
1: 480x480 (no detections), 43.6ms
2: 480x480 (no detections), 43.6ms
3: 480x480 (no detections), 43.6ms
4: 480x480 (no detections), 43.6ms
5: 480x480 (no detections), 43.6ms
6: 480x480 (no detections), 43.6ms
7: 480x480 (no detections), 43.6ms
Speed: 0.0ms preprocess, 43.6ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:   5%|▌         | 25/500 [00:11<03:40,  2.16it/s]


0: 480x480 2 persons, 48.4ms
1: 480x480 3 persons, 2 ties, 48.4ms
2: 480x480 2 persons, 2 wine glasss, 1 cup, 48.4ms
3: 480x480 1 person, 1 sports ball, 1 tennis racket, 48.4ms
4: 480x480 1 person, 1 cat, 1 dog, 1 cup, 1 cell phone, 48.4ms
5: 480x480 (no detections), 48.4ms
6: 480x480 2 persons, 2 cups, 1 dining table, 48.4ms
7: 480x480 1 person, 8 cars, 1 bus, 2 trucks, 48.4ms
Speed: 0.0ms preprocess, 48.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:   5%|▌         | 26/500 [00:12<03:41,  2.14it/s]


0: 480x480 (no detections), 46.0ms
1: 480x480 (no detections), 46.0ms
2: 480x480 (no detections), 46.0ms
3: 480x480 (no detections), 46.0ms
4: 480x480 (no detections), 46.0ms
5: 480x480 (no detections), 46.0ms
6: 480x480 (no detections), 46.0ms
7: 480x480 (no detections), 46.0ms
Speed: 0.0ms preprocess, 46.0ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 480)


Validating:   5%|▌         | 27/500 [00:12<03:38,  2.16it/s]


0: 480x480 (no detections), 43.6ms
1: 480x480 (no detections), 43.6ms
2: 480x480 (no detections), 43.6ms
3: 480x480 (no detections), 43.6ms
4: 480x480 (no detections), 43.6ms
5: 480x480 (no detections), 43.6ms
6: 480x480 (no detections), 43.6ms
7: 480x480 (no detections), 43.6ms
Speed: 0.0ms preprocess, 43.6ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:   6%|▌         | 28/500 [00:12<03:32,  2.23it/s]


0: 480x480 1 bowl, 5 broccolis, 4 carrots, 52.4ms
1: 480x480 3 hot dogs, 52.4ms
2: 480x480 1 person, 52.4ms
3: 480x480 1 person, 1 skis, 52.4ms
4: 480x480 (no detections), 52.4ms
5: 480x480 1 parking meter, 1 bed, 52.4ms
6: 480x480 2 clocks, 52.4ms
7: 480x480 1 person, 1 tennis racket, 1 scissors, 52.4ms
Speed: 0.0ms preprocess, 52.4ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:   6%|▌         | 29/500 [00:13<03:41,  2.13it/s]


0: 480x480 (no detections), 42.4ms
1: 480x480 (no detections), 42.4ms
2: 480x480 (no detections), 42.4ms
3: 480x480 (no detections), 42.4ms
4: 480x480 (no detections), 42.4ms
5: 480x480 (no detections), 42.4ms
6: 480x480 (no detections), 42.4ms
7: 480x480 (no detections), 42.4ms
Speed: 0.0ms preprocess, 42.4ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:   6%|▌         | 30/500 [00:13<03:34,  2.20it/s]


0: 480x480 4 persons, 3 wine glasss, 1 cup, 50.5ms
1: 480x480 1 person, 1 surfboard, 50.5ms
2: 480x480 3 laptops, 1 keyboard, 3 cell phones, 50.5ms
3: 480x480 3 persons, 1 surfboard, 50.5ms
4: 480x480 1 bird, 1 tv, 50.5ms
5: 480x480 4 bottles, 1 bowl, 1 toilet, 50.5ms
6: 480x480 2 umbrellas, 50.5ms
7: 480x480 1 person, 50.5ms
Speed: 0.0ms preprocess, 50.5ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:   6%|▌         | 31/500 [00:14<03:39,  2.14it/s]


0: 480x480 1 person, 4 cars, 4 trucks, 2 traffic lights, 45.1ms
1: 480x480 1 person, 1 tennis racket, 45.1ms
2: 480x480 6 persons, 1 skis, 45.1ms
3: 480x480 2 cars, 1 bus, 1 truck, 45.1ms
4: 480x480 1 airplane, 45.1ms
5: 480x480 1 person, 1 tennis racket, 45.1ms
6: 480x480 1 person, 45.1ms
7: 480x480 1 person, 1 sports ball, 1 tennis racket, 45.1ms
Speed: 0.0ms preprocess, 45.1ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:   6%|▋         | 32/500 [00:14<03:38,  2.14it/s]


0: 480x480 (no detections), 54.8ms
1: 480x480 (no detections), 54.8ms
2: 480x480 (no detections), 54.8ms
3: 480x480 (no detections), 54.8ms
4: 480x480 (no detections), 54.8ms
5: 480x480 (no detections), 54.8ms
6: 480x480 (no detections), 54.8ms
7: 480x480 (no detections), 54.8ms
Speed: 0.0ms preprocess, 54.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:   7%|▋         | 33/500 [00:15<03:43,  2.09it/s]


0: 480x480 2 tennis rackets, 1 bottle, 1 book, 45.2ms
1: 480x480 (no detections), 45.2ms
2: 480x480 1 cup, 1 laptop, 45.2ms
3: 480x480 1 bus, 45.2ms
4: 480x480 1 bench, 2 teddy bears, 45.2ms
5: 480x480 4 persons, 1 wine glass, 1 clock, 45.2ms
6: 480x480 2 persons, 1 cell phone, 45.2ms
7: 480x480 1 boat, 45.2ms
Speed: 0.0ms preprocess, 45.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:   7%|▋         | 34/500 [00:15<03:40,  2.12it/s]


0: 480x480 (no detections), 45.2ms
1: 480x480 (no detections), 45.2ms
2: 480x480 (no detections), 45.2ms
3: 480x480 (no detections), 45.2ms
4: 480x480 (no detections), 45.2ms
5: 480x480 (no detections), 45.2ms
6: 480x480 (no detections), 45.2ms
7: 480x480 (no detections), 45.2ms
Speed: 0.0ms preprocess, 45.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:   7%|▋         | 35/500 [00:16<03:33,  2.18it/s]


0: 480x480 1 toilet, 41.7ms
1: 480x480 1 person, 2 skiss, 41.7ms
2: 480x480 3 persons, 2 skiss, 41.7ms
3: 480x480 2 persons, 2 cups, 2 donuts, 41.7ms
4: 480x480 5 persons, 1 bicycle, 41.7ms
5: 480x480 2 persons, 41.7ms
6: 480x480 1 couch, 5 potted plants, 1 bed, 41.7ms
7: 480x480 2 persons, 1 tennis racket, 41.7ms
Speed: 0.0ms preprocess, 41.7ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:   7%|▋         | 36/500 [00:16<03:28,  2.23it/s]


0: 480x480 (no detections), 42.8ms
1: 480x480 (no detections), 42.8ms
2: 480x480 (no detections), 42.8ms
3: 480x480 (no detections), 42.8ms
4: 480x480 (no detections), 42.8ms
5: 480x480 (no detections), 42.8ms
6: 480x480 (no detections), 42.8ms
7: 480x480 (no detections), 42.8ms
Speed: 0.0ms preprocess, 42.8ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:   7%|▋         | 37/500 [00:17<03:24,  2.27it/s]


0: 480x480 (no detections), 47.6ms
1: 480x480 (no detections), 47.6ms
2: 480x480 (no detections), 47.6ms
3: 480x480 (no detections), 47.6ms
4: 480x480 (no detections), 47.6ms
5: 480x480 (no detections), 47.6ms
6: 480x480 (no detections), 47.6ms
7: 480x480 (no detections), 47.6ms
Speed: 0.0ms preprocess, 47.6ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:   8%|▊         | 38/500 [00:17<03:25,  2.25it/s]


0: 480x480 (no detections), 42.1ms
1: 480x480 1 dog, 1 sports ball, 42.1ms
2: 480x480 2 persons, 1 cell phone, 42.1ms
3: 480x480 1 train, 42.1ms
4: 480x480 5 persons, 42.1ms
5: 480x480 (no detections), 42.1ms
6: 480x480 (no detections), 42.1ms
7: 480x480 2 persons, 42.1ms
Speed: 0.0ms preprocess, 42.1ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:   8%|▊         | 39/500 [00:17<03:22,  2.28it/s]


0: 480x480 (no detections), 44.5ms
1: 480x480 (no detections), 44.5ms
2: 480x480 (no detections), 44.5ms
3: 480x480 (no detections), 44.5ms
4: 480x480 (no detections), 44.5ms
5: 480x480 (no detections), 44.5ms
6: 480x480 (no detections), 44.5ms
7: 480x480 (no detections), 44.5ms
Speed: 0.0ms preprocess, 44.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:   8%|▊         | 40/500 [00:18<03:19,  2.31it/s]


0: 480x480 (no detections), 44.8ms
1: 480x480 (no detections), 44.8ms
2: 480x480 (no detections), 44.8ms
3: 480x480 (no detections), 44.8ms
4: 480x480 (no detections), 44.8ms
5: 480x480 (no detections), 44.8ms
6: 480x480 (no detections), 44.8ms
7: 480x480 (no detections), 44.8ms
Speed: 0.0ms preprocess, 44.8ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:   8%|▊         | 41/500 [00:18<03:18,  2.31it/s]


0: 480x480 (no detections), 42.1ms
1: 480x480 (no detections), 42.1ms
2: 480x480 (no detections), 42.1ms
3: 480x480 (no detections), 42.1ms
4: 480x480 (no detections), 42.1ms
5: 480x480 (no detections), 42.1ms
6: 480x480 (no detections), 42.1ms
7: 480x480 (no detections), 42.1ms
Speed: 0.0ms preprocess, 42.1ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:   8%|▊         | 42/500 [00:19<03:15,  2.35it/s]


0: 480x480 (no detections), 47.2ms
1: 480x480 (no detections), 47.2ms
2: 480x480 (no detections), 47.2ms
3: 480x480 (no detections), 47.2ms
4: 480x480 (no detections), 47.2ms
5: 480x480 (no detections), 47.2ms
6: 480x480 (no detections), 47.2ms
7: 480x480 (no detections), 47.2ms
Speed: 0.0ms preprocess, 47.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:   9%|▊         | 43/500 [00:19<03:18,  2.30it/s]


0: 480x480 1 person, 43.4ms
1: 480x480 1 pizza, 43.4ms
2: 480x480 1 person, 43.4ms
3: 480x480 1 dog, 43.4ms
4: 480x480 9 persons, 1 sports ball, 43.4ms
5: 480x480 6 zebras, 43.4ms
6: 480x480 3 dogs, 43.4ms
7: 480x480 6 boats, 43.4ms
Speed: 0.0ms preprocess, 43.4ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:   9%|▉         | 44/500 [00:20<03:18,  2.30it/s]


0: 480x480 (no detections), 44.2ms
1: 480x480 (no detections), 44.2ms
2: 480x480 (no detections), 44.2ms
3: 480x480 (no detections), 44.2ms
4: 480x480 (no detections), 44.2ms
5: 480x480 (no detections), 44.2ms
6: 480x480 (no detections), 44.2ms
7: 480x480 (no detections), 44.2ms
Speed: 0.0ms preprocess, 44.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:   9%|▉         | 45/500 [00:20<03:18,  2.29it/s]


0: 480x480 (no detections), 41.0ms
1: 480x480 (no detections), 41.0ms
2: 480x480 (no detections), 41.0ms
3: 480x480 (no detections), 41.0ms
4: 480x480 (no detections), 41.0ms
5: 480x480 (no detections), 41.0ms
6: 480x480 (no detections), 41.0ms
7: 480x480 (no detections), 41.0ms
Speed: 0.0ms preprocess, 41.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:   9%|▉         | 46/500 [00:20<03:13,  2.34it/s]


0: 480x480 1 microwave, 1 oven, 1 sink, 1 refrigerator, 46.4ms
1: 480x480 1 person, 1 bed, 1 laptop, 46.4ms
2: 480x480 1 person, 1 skis, 46.4ms
3: 480x480 14 persons, 46.4ms
4: 480x480 (no detections), 46.4ms
5: 480x480 1 person, 2 surfboards, 46.4ms
6: 480x480 1 airplane, 46.4ms
7: 480x480 1 car, 1 airplane, 46.4ms
Speed: 0.0ms preprocess, 46.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:   9%|▉         | 47/500 [00:21<03:16,  2.30it/s]


0: 480x480 (no detections), 47.2ms
1: 480x480 (no detections), 47.2ms
2: 480x480 (no detections), 47.2ms
3: 480x480 (no detections), 47.2ms
4: 480x480 (no detections), 47.2ms
5: 480x480 (no detections), 47.2ms
6: 480x480 (no detections), 47.2ms
7: 480x480 (no detections), 47.2ms
Speed: 0.0ms preprocess, 47.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  10%|▉         | 48/500 [00:21<03:17,  2.28it/s]


0: 480x480 (no detections), 42.4ms
1: 480x480 (no detections), 42.4ms
2: 480x480 (no detections), 42.4ms
3: 480x480 (no detections), 42.4ms
4: 480x480 (no detections), 42.4ms
5: 480x480 (no detections), 42.4ms
6: 480x480 (no detections), 42.4ms
7: 480x480 (no detections), 42.4ms
Speed: 0.0ms preprocess, 42.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  10%|▉         | 49/500 [00:22<03:15,  2.31it/s]


0: 480x480 (no detections), 39.0ms
1: 480x480 (no detections), 39.0ms
2: 480x480 (no detections), 39.0ms
3: 480x480 (no detections), 39.0ms
4: 480x480 (no detections), 39.0ms
5: 480x480 (no detections), 39.0ms
6: 480x480 (no detections), 39.0ms
7: 480x480 (no detections), 39.0ms
Speed: 0.0ms preprocess, 39.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  10%|█         | 50/500 [00:22<03:07,  2.39it/s]


0: 480x480 (no detections), 37.2ms
1: 480x480 (no detections), 37.2ms
2: 480x480 (no detections), 37.2ms
3: 480x480 (no detections), 37.2ms
4: 480x480 (no detections), 37.2ms
5: 480x480 (no detections), 37.2ms
6: 480x480 (no detections), 37.2ms
7: 480x480 (no detections), 37.2ms
Speed: 0.0ms preprocess, 37.2ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  10%|█         | 51/500 [00:23<03:01,  2.48it/s]


0: 480x480 (no detections), 37.2ms
1: 480x480 (no detections), 37.2ms
2: 480x480 (no detections), 37.2ms
3: 480x480 (no detections), 37.2ms
4: 480x480 (no detections), 37.2ms
5: 480x480 (no detections), 37.2ms
6: 480x480 (no detections), 37.2ms
7: 480x480 (no detections), 37.2ms
Speed: 0.0ms preprocess, 37.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  10%|█         | 52/500 [00:23<02:56,  2.54it/s]


0: 480x480 (no detections), 39.4ms
1: 480x480 2 persons, 1 tie, 39.4ms
2: 480x480 2 persons, 2 remotes, 39.4ms
3: 480x480 2 zebras, 7 giraffes, 39.4ms
4: 480x480 1 bottle, 1 banana, 1 laptop, 1 cell phone, 39.4ms
5: 480x480 1 person, 39.4ms
6: 480x480 2 persons, 1 bench, 39.4ms
7: 480x480 1 clock, 39.4ms
Speed: 0.0ms preprocess, 39.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  11%|█         | 53/500 [00:23<02:57,  2.52it/s]


0: 480x480 13 persons, 1 sports ball, 1 baseball bat, 1 baseball glove, 35.9ms
1: 480x480 2 zebras, 35.9ms
2: 480x480 (no detections), 35.9ms
3: 480x480 1 bowl, 9 oranges, 35.9ms
4: 480x480 7 persons, 35.9ms
5: 480x480 (no detections), 35.9ms
6: 480x480 1 person, 1 tie, 35.9ms
7: 480x480 2 bicycles, 35.9ms
Speed: 0.0ms preprocess, 35.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  11%|█         | 54/500 [00:24<02:52,  2.58it/s]


0: 480x480 (no detections), 38.4ms
1: 480x480 (no detections), 38.4ms
2: 480x480 (no detections), 38.4ms
3: 480x480 (no detections), 38.4ms
4: 480x480 (no detections), 38.4ms
5: 480x480 (no detections), 38.4ms
6: 480x480 (no detections), 38.4ms
7: 480x480 (no detections), 38.4ms
Speed: 0.0ms preprocess, 38.4ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  11%|█         | 55/500 [00:24<02:50,  2.61it/s]


0: 480x480 1 person, 1 bed, 37.5ms
1: 480x480 1 bottle, 1 cup, 1 toilet, 1 sink, 37.5ms
2: 480x480 2 persons, 1 sheep, 37.5ms
3: 480x480 5 persons, 37.5ms
4: 480x480 3 bowls, 1 broccoli, 37.5ms
5: 480x480 1 person, 37.5ms
6: 480x480 3 persons, 1 bowl, 1 pizza, 37.5ms
7: 480x480 4 persons, 1 tie, 37.5ms
Speed: 0.0ms preprocess, 37.5ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  11%|█         | 56/500 [00:24<02:51,  2.59it/s]


0: 480x480 2 persons, 1 surfboard, 39.7ms
1: 480x480 1 bowl, 1 pizza, 39.7ms
2: 480x480 1 person, 2 cups, 1 tv, 1 laptop, 2 keyboards, 1 cell phone, 39.7ms
3: 480x480 (no detections), 39.7ms
4: 480x480 2 bottles, 39.7ms
5: 480x480 4 zebras, 39.7ms
6: 480x480 1 person, 1 skis, 39.7ms
7: 480x480 (no detections), 39.7ms
Speed: 0.0ms preprocess, 39.7ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  11%|█▏        | 57/500 [00:25<02:52,  2.56it/s]


0: 480x480 (no detections), 41.1ms
1: 480x480 (no detections), 41.1ms
2: 480x480 (no detections), 41.1ms
3: 480x480 (no detections), 41.1ms
4: 480x480 (no detections), 41.1ms
5: 480x480 (no detections), 41.1ms
6: 480x480 (no detections), 41.1ms
7: 480x480 (no detections), 41.1ms
Speed: 0.0ms preprocess, 41.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  12%|█▏        | 58/500 [00:25<02:55,  2.51it/s]


0: 480x480 2 persons, 1 skateboard, 39.4ms
1: 480x480 3 persons, 39.4ms
2: 480x480 1 person, 1 sports ball, 1 tennis racket, 39.4ms
3: 480x480 2 persons, 1 bird, 6 umbrellas, 1 cup, 2 chairs, 39.4ms
4: 480x480 1 train, 39.4ms
5: 480x480 2 persons, 9 bottles, 1 cup, 3 bowls, 1 apple, 39.4ms
6: 480x480 1 bed, 39.4ms
7: 480x480 1 sheep, 39.4ms
Speed: 0.0ms preprocess, 39.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  12%|█▏        | 59/500 [00:26<02:55,  2.51it/s]


0: 480x480 (no detections), 36.4ms
1: 480x480 (no detections), 36.4ms
2: 480x480 (no detections), 36.4ms
3: 480x480 (no detections), 36.4ms
4: 480x480 (no detections), 36.4ms
5: 480x480 (no detections), 36.4ms
6: 480x480 (no detections), 36.4ms
7: 480x480 (no detections), 36.4ms
Speed: 0.0ms preprocess, 36.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  12%|█▏        | 60/500 [00:26<02:50,  2.58it/s]


0: 480x480 (no detections), 37.9ms
1: 480x480 (no detections), 37.9ms
2: 480x480 (no detections), 37.9ms
3: 480x480 (no detections), 37.9ms
4: 480x480 (no detections), 37.9ms
5: 480x480 (no detections), 37.9ms
6: 480x480 (no detections), 37.9ms
7: 480x480 (no detections), 37.9ms
Speed: 0.0ms preprocess, 37.9ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  12%|█▏        | 61/500 [00:26<02:49,  2.59it/s]


0: 480x480 1 bed, 38.0ms
1: 480x480 4 persons, 38.0ms
2: 480x480 5 persons, 38.0ms
3: 480x480 1 person, 1 skateboard, 38.0ms
4: 480x480 (no detections), 38.0ms
5: 480x480 1 zebra, 3 umbrellas, 38.0ms
6: 480x480 3 zebras, 38.0ms
7: 480x480 (no detections), 38.0ms
Speed: 0.0ms preprocess, 38.0ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  12%|█▏        | 62/500 [00:27<02:47,  2.61it/s]


0: 480x480 (no detections), 36.3ms
1: 480x480 (no detections), 36.3ms
2: 480x480 (no detections), 36.3ms
3: 480x480 (no detections), 36.3ms
4: 480x480 (no detections), 36.3ms
5: 480x480 (no detections), 36.3ms
6: 480x480 (no detections), 36.3ms
7: 480x480 (no detections), 36.3ms
Speed: 0.0ms preprocess, 36.3ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  13%|█▎        | 63/500 [00:27<02:43,  2.67it/s]


0: 480x480 1 dog, 1 bed, 40.1ms
1: 480x480 2 beds, 40.1ms
2: 480x480 4 persons, 1 umbrella, 40.1ms
3: 480x480 (no detections), 40.1ms
4: 480x480 2 persons, 1 skateboard, 40.1ms
5: 480x480 1 person, 1 bed, 40.1ms
6: 480x480 1 skateboard, 40.1ms
7: 480x480 9 persons, 1 keyboard, 40.1ms
Speed: 0.0ms preprocess, 40.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  13%|█▎        | 64/500 [00:28<02:47,  2.60it/s]


0: 480x480 1 person, 2 beds, 39.8ms
1: 480x480 2 persons, 1 clock, 39.8ms
2: 480x480 1 bed, 3 teddy bears, 39.8ms
3: 480x480 1 bird, 39.8ms
4: 480x480 25 bottles, 1 refrigerator, 39.8ms
5: 480x480 2 persons, 1 bed, 39.8ms
6: 480x480 2 persons, 1 umbrella, 39.8ms
7: 480x480 2 persons, 39.8ms
Speed: 0.0ms preprocess, 39.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  13%|█▎        | 65/500 [00:28<02:51,  2.54it/s]


0: 480x480 1 bicycle, 2 cars, 3 umbrellas, 35.4ms
1: 480x480 2 surfboards, 35.4ms
2: 480x480 6 bottles, 1 bowl, 1 oven, 1 sink, 1 refrigerator, 1 clock, 35.4ms
3: 480x480 4 persons, 35.4ms
4: 480x480 1 person, 1 surfboard, 35.4ms
5: 480x480 10 cups, 1 dining table, 35.4ms
6: 480x480 1 person, 1 snowboard, 35.4ms
7: 480x480 4 forks, 1 scissors, 35.4ms
Speed: 0.0ms preprocess, 35.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  13%|█▎        | 66/500 [00:28<02:47,  2.58it/s]


0: 480x480 1 bowl, 1 pizza, 37.4ms
1: 480x480 6 persons, 37.4ms
2: 480x480 2 clocks, 37.4ms
3: 480x480 3 birds, 37.4ms
4: 480x480 1 person, 1 bus, 37.4ms
5: 480x480 1 car, 1 traffic light, 1 dog, 37.4ms
6: 480x480 11 persons, 1 tennis racket, 2 chairs, 37.4ms
7: 480x480 2 persons, 1 bus, 1 laptop, 37.4ms
Speed: 0.0ms preprocess, 37.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  13%|█▎        | 67/500 [00:29<02:47,  2.59it/s]


0: 480x480 (no detections), 35.8ms
1: 480x480 (no detections), 35.8ms
2: 480x480 (no detections), 35.8ms
3: 480x480 (no detections), 35.8ms
4: 480x480 (no detections), 35.8ms
5: 480x480 (no detections), 35.8ms
6: 480x480 (no detections), 35.8ms
7: 480x480 (no detections), 35.8ms
Speed: 0.0ms preprocess, 35.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  14%|█▎        | 68/500 [00:29<02:43,  2.65it/s]


0: 480x480 (no detections), 36.2ms
1: 480x480 (no detections), 36.2ms
2: 480x480 (no detections), 36.2ms
3: 480x480 (no detections), 36.2ms
4: 480x480 (no detections), 36.2ms
5: 480x480 (no detections), 36.2ms
6: 480x480 (no detections), 36.2ms
7: 480x480 (no detections), 36.2ms
Speed: 0.0ms preprocess, 36.2ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  14%|█▍        | 69/500 [00:29<02:40,  2.68it/s]


0: 480x480 (no detections), 38.0ms
1: 480x480 (no detections), 38.0ms
2: 480x480 (no detections), 38.0ms
3: 480x480 (no detections), 38.0ms
4: 480x480 (no detections), 38.0ms
5: 480x480 (no detections), 38.0ms
6: 480x480 (no detections), 38.0ms
7: 480x480 (no detections), 38.0ms
Speed: 0.0ms preprocess, 38.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  14%|█▍        | 70/500 [00:30<02:41,  2.67it/s]


0: 480x480 1 person, 1 sports ball, 1 tennis racket, 38.0ms
1: 480x480 3 cups, 1 teddy bear, 38.0ms
2: 480x480 1 person, 1 dog, 2 skiss, 38.0ms
3: 480x480 1 person, 1 snowboard, 38.0ms
4: 480x480 2 elephants, 38.0ms
5: 480x480 4 persons, 1 tie, 38.0ms
6: 480x480 1 teddy bear, 38.0ms
7: 480x480 5 persons, 1 horse, 38.0ms
Speed: 0.0ms preprocess, 38.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  14%|█▍        | 71/500 [00:30<02:45,  2.60it/s]


0: 480x480 1 truck, 1 bench, 1 cat, 36.0ms
1: 480x480 1 person, 2 skiss, 36.0ms
2: 480x480 1 person, 1 bottle, 1 fork, 1 pizza, 36.0ms
3: 480x480 1 cat, 2 laptops, 36.0ms
4: 480x480 1 bus, 36.0ms
5: 480x480 1 person, 1 umbrella, 36.0ms
6: 480x480 1 person, 1 bed, 36.0ms
7: 480x480 (no detections), 36.0ms
Speed: 0.0ms preprocess, 36.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  14%|█▍        | 72/500 [00:31<02:44,  2.61it/s]


0: 480x480 5 persons, 40.8ms
1: 480x480 3 persons, 40.8ms
2: 480x480 1 bench, 2 teddy bears, 40.8ms
3: 480x480 3 persons, 1 remote, 40.8ms
4: 480x480 1 person, 1 bird, 1 chair, 40.8ms
5: 480x480 1 person, 1 surfboard, 40.8ms
6: 480x480 1 laptop, 1 remote, 40.8ms
7: 480x480 1 dog, 1 cake, 1 dining table, 40.8ms
Speed: 0.0ms preprocess, 40.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  15%|█▍        | 73/500 [00:31<02:48,  2.54it/s]


0: 480x480 1 person, 8 boats, 1 umbrella, 38.7ms
1: 480x480 1 bus, 1 stop sign, 38.7ms
2: 480x480 3 persons, 2 umbrellas, 1 frisbee, 38.7ms
3: 480x480 (no detections), 38.7ms
4: 480x480 3 persons, 1 sports ball, 1 tennis racket, 38.7ms
5: 480x480 3 hot dogs, 38.7ms
6: 480x480 1 person, 1 skis, 38.7ms
7: 480x480 6 persons, 1 baseball glove, 38.7ms
Speed: 0.0ms preprocess, 38.7ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  15%|█▍        | 74/500 [00:31<02:47,  2.54it/s]


0: 480x480 2 cars, 1 truck, 42.0ms
1: 480x480 8 persons, 1 tv, 42.0ms
2: 480x480 1 car, 42.0ms
3: 480x480 1 pizza, 1 tv, 42.0ms
4: 480x480 1 person, 1 umbrella, 1 chair, 42.0ms
5: 480x480 1 person, 1 skateboard, 42.0ms
6: 480x480 3 persons, 42.0ms
7: 480x480 6 cars, 1 airplane, 42.0ms
Speed: 0.0ms preprocess, 42.0ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 480)


Validating:  15%|█▌        | 75/500 [00:32<02:51,  2.47it/s]


0: 480x480 (no detections), 33.8ms
1: 480x480 (no detections), 33.8ms
2: 480x480 (no detections), 33.8ms
3: 480x480 (no detections), 33.8ms
4: 480x480 (no detections), 33.8ms
5: 480x480 (no detections), 33.8ms
6: 480x480 (no detections), 33.8ms
7: 480x480 (no detections), 33.8ms
Speed: 0.0ms preprocess, 33.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  15%|█▌        | 76/500 [00:32<02:42,  2.61it/s]


0: 480x480 (no detections), 37.5ms
1: 480x480 (no detections), 37.5ms
2: 480x480 (no detections), 37.5ms
3: 480x480 (no detections), 37.5ms
4: 480x480 (no detections), 37.5ms
5: 480x480 (no detections), 37.5ms
6: 480x480 (no detections), 37.5ms
7: 480x480 (no detections), 37.5ms
Speed: 0.0ms preprocess, 37.5ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  15%|█▌        | 77/500 [00:33<02:39,  2.64it/s]


0: 480x480 3 persons, 1 car, 1 bench, 36.3ms
1: 480x480 2 persons, 1 umbrella, 36.3ms
2: 480x480 5 persons, 36.3ms
3: 480x480 1 fire hydrant, 1 potted plant, 36.3ms
4: 480x480 (no detections), 36.3ms
5: 480x480 2 benchs, 36.3ms
6: 480x480 1 bowl, 36.3ms
7: 480x480 1 person, 36.3ms
Speed: 0.0ms preprocess, 36.3ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  16%|█▌        | 78/500 [00:33<02:38,  2.66it/s]


0: 480x480 (no detections), 37.1ms
1: 480x480 (no detections), 37.1ms
2: 480x480 (no detections), 37.1ms
3: 480x480 (no detections), 37.1ms
4: 480x480 (no detections), 37.1ms
5: 480x480 (no detections), 37.1ms
6: 480x480 (no detections), 37.1ms
7: 480x480 (no detections), 37.1ms
Speed: 0.0ms preprocess, 37.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  16%|█▌        | 79/500 [00:33<02:37,  2.68it/s]


0: 480x480 1 cat, 40.8ms
1: 480x480 1 person, 1 tennis racket, 40.8ms
2: 480x480 5 cars, 1 truck, 40.8ms
3: 480x480 2 persons, 1 tv, 40.8ms
4: 480x480 1 sandwich, 40.8ms
5: 480x480 2 persons, 3 benchs, 40.8ms
6: 480x480 (no detections), 40.8ms
7: 480x480 1 bird, 40.8ms
Speed: 0.0ms preprocess, 40.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  16%|█▌        | 80/500 [00:34<02:41,  2.60it/s]


0: 480x480 1 bench, 39.1ms
1: 480x480 2 bottles, 1 toilet, 1 sink, 39.1ms
2: 480x480 1 bus, 1 train, 39.1ms
3: 480x480 3 buss, 39.1ms
4: 480x480 1 airplane, 39.1ms
5: 480x480 3 persons, 3 bananas, 39.1ms
6: 480x480 1 person, 39.1ms
7: 480x480 3 persons, 5 pizzas, 39.1ms
Speed: 0.0ms preprocess, 39.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  16%|█▌        | 81/500 [00:34<02:42,  2.57it/s]


0: 480x480 3 persons, 39.5ms
1: 480x480 2 persons, 1 bench, 3 bottles, 1 cup, 1 remote, 39.5ms
2: 480x480 1 bowl, 1 potted plant, 1 microwave, 2 ovens, 1 sink, 1 vase, 39.5ms
3: 480x480 1 bowl, 39.5ms
4: 480x480 2 cups, 1 knife, 2 sandwichs, 1 hot dog, 39.5ms
5: 480x480 2 zebras, 39.5ms
6: 480x480 3 bowls, 39.5ms
7: 480x480 1 person, 1 tv, 1 teddy bear, 39.5ms
Speed: 0.0ms preprocess, 39.5ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  16%|█▋        | 82/500 [00:35<02:44,  2.54it/s]


0: 480x480 (no detections), 40.0ms
1: 480x480 (no detections), 40.0ms
2: 480x480 (no detections), 40.0ms
3: 480x480 (no detections), 40.0ms
4: 480x480 (no detections), 40.0ms
5: 480x480 (no detections), 40.0ms
6: 480x480 (no detections), 40.0ms
7: 480x480 (no detections), 40.0ms
Speed: 0.0ms preprocess, 40.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  17%|█▋        | 83/500 [00:35<02:43,  2.54it/s]


0: 480x480 (no detections), 36.9ms
1: 480x480 (no detections), 36.9ms
2: 480x480 (no detections), 36.9ms
3: 480x480 (no detections), 36.9ms
4: 480x480 (no detections), 36.9ms
5: 480x480 (no detections), 36.9ms
6: 480x480 (no detections), 36.9ms
7: 480x480 (no detections), 36.9ms
Speed: 0.0ms preprocess, 36.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  17%|█▋        | 84/500 [00:35<02:42,  2.57it/s]


0: 480x480 (no detections), 36.7ms
1: 480x480 (no detections), 36.7ms
2: 480x480 (no detections), 36.7ms
3: 480x480 (no detections), 36.7ms
4: 480x480 (no detections), 36.7ms
5: 480x480 (no detections), 36.7ms
6: 480x480 (no detections), 36.7ms
7: 480x480 (no detections), 36.7ms
Speed: 0.0ms preprocess, 36.7ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  17%|█▋        | 85/500 [00:36<02:37,  2.63it/s]


0: 480x480 (no detections), 38.9ms
1: 480x480 (no detections), 38.9ms
2: 480x480 (no detections), 38.9ms
3: 480x480 (no detections), 38.9ms
4: 480x480 (no detections), 38.9ms
5: 480x480 (no detections), 38.9ms
6: 480x480 (no detections), 38.9ms
7: 480x480 (no detections), 38.9ms
Speed: 0.0ms preprocess, 38.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  17%|█▋        | 86/500 [00:36<02:41,  2.56it/s]


0: 480x480 (no detections), 40.0ms
1: 480x480 (no detections), 40.0ms
2: 480x480 (no detections), 40.0ms
3: 480x480 (no detections), 40.0ms
4: 480x480 (no detections), 40.0ms
5: 480x480 (no detections), 40.0ms
6: 480x480 (no detections), 40.0ms
7: 480x480 (no detections), 40.0ms
Speed: 0.0ms preprocess, 40.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  17%|█▋        | 87/500 [00:36<02:40,  2.57it/s]


0: 480x480 (no detections), 39.9ms
1: 480x480 (no detections), 39.9ms
2: 480x480 (no detections), 39.9ms
3: 480x480 (no detections), 39.9ms
4: 480x480 (no detections), 39.9ms
5: 480x480 (no detections), 39.9ms
6: 480x480 (no detections), 39.9ms
7: 480x480 (no detections), 39.9ms
Speed: 0.0ms preprocess, 39.9ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  18%|█▊        | 88/500 [00:37<02:40,  2.57it/s]


0: 480x480 (no detections), 34.0ms
1: 480x480 (no detections), 34.0ms
2: 480x480 (no detections), 34.0ms
3: 480x480 (no detections), 34.0ms
4: 480x480 (no detections), 34.0ms
5: 480x480 (no detections), 34.0ms
6: 480x480 (no detections), 34.0ms
7: 480x480 (no detections), 34.0ms
Speed: 0.0ms preprocess, 34.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  18%|█▊        | 89/500 [00:37<02:33,  2.68it/s]


0: 480x480 1 cat, 1 couch, 35.6ms
1: 480x480 3 giraffes, 35.6ms
2: 480x480 (no detections), 35.6ms
3: 480x480 1 truck, 1 clock, 35.6ms
4: 480x480 2 persons, 1 motorcycle, 35.6ms
5: 480x480 (no detections), 35.6ms
6: 480x480 1 toilet, 35.6ms
7: 480x480 1 bottle, 35.6ms
Speed: 0.0ms preprocess, 35.6ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  18%|█▊        | 90/500 [00:38<02:34,  2.65it/s]


0: 480x480 (no detections), 38.6ms
1: 480x480 (no detections), 38.6ms
2: 480x480 (no detections), 38.6ms
3: 480x480 (no detections), 38.6ms
4: 480x480 (no detections), 38.6ms
5: 480x480 (no detections), 38.6ms
6: 480x480 (no detections), 38.6ms
7: 480x480 (no detections), 38.6ms
Speed: 0.0ms preprocess, 38.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  18%|█▊        | 91/500 [00:38<02:34,  2.65it/s]


0: 480x480 1 train, 4 sinks, 39.8ms
1: 480x480 1 broccoli, 1 pizza, 39.8ms
2: 480x480 1 fork, 1 bowl, 1 broccoli, 1 carrot, 39.8ms
3: 480x480 (no detections), 39.8ms
4: 480x480 1 bowl, 1 donut, 1 dining table, 39.8ms
5: 480x480 2 toilets, 1 sink, 39.8ms
6: 480x480 7 persons, 1 elephant, 39.8ms
7: 480x480 1 bowl, 1 carrot, 39.8ms
Speed: 0.0ms preprocess, 39.8ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  18%|█▊        | 92/500 [00:38<02:37,  2.59it/s]


0: 480x480 (no detections), 37.9ms
1: 480x480 (no detections), 37.9ms
2: 480x480 (no detections), 37.9ms
3: 480x480 (no detections), 37.9ms
4: 480x480 (no detections), 37.9ms
5: 480x480 (no detections), 37.9ms
6: 480x480 (no detections), 37.9ms
7: 480x480 (no detections), 37.9ms
Speed: 0.0ms preprocess, 37.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  19%|█▊        | 93/500 [00:39<02:35,  2.62it/s]


0: 480x480 1 bus, 36.6ms
1: 480x480 1 person, 1 bench, 2 skiss, 36.6ms
2: 480x480 1 person, 36.6ms
3: 480x480 2 persons, 36.6ms
4: 480x480 1 knife, 36.6ms
5: 480x480 3 persons, 2 elephants, 36.6ms
6: 480x480 1 laptop, 36.6ms
7: 480x480 2 bottles, 3 cups, 4 sandwichs, 1 dining table, 36.6ms
Speed: 0.0ms preprocess, 36.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  19%|█▉        | 94/500 [00:39<02:34,  2.63it/s]


0: 480x480 (no detections), 40.0ms
1: 480x480 1 person, 1 skateboard, 40.0ms
2: 480x480 2 persons, 2 surfboards, 40.0ms
3: 480x480 2 elephants, 40.0ms
4: 480x480 1 cat, 40.0ms
5: 480x480 6 persons, 40.0ms
6: 480x480 1 person, 1 remote, 15 books, 40.0ms
7: 480x480 (no detections), 40.0ms
Speed: 0.0ms preprocess, 40.0ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  19%|█▉        | 95/500 [00:39<02:36,  2.59it/s]


0: 480x480 3 donuts, 36.8ms
1: 480x480 1 pizza, 1 cake, 1 oven, 36.8ms
2: 480x480 4 zebras, 36.8ms
3: 480x480 (no detections), 36.8ms
4: 480x480 2 umbrellas, 36.8ms
5: 480x480 3 elephants, 36.8ms
6: 480x480 1 airplane, 3 trucks, 36.8ms
7: 480x480 1 person, 36.8ms
Speed: 0.0ms preprocess, 36.8ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  19%|█▉        | 96/500 [00:40<02:36,  2.58it/s]


0: 480x480 (no detections), 35.2ms
1: 480x480 (no detections), 35.2ms
2: 480x480 (no detections), 35.2ms
3: 480x480 (no detections), 35.2ms
4: 480x480 (no detections), 35.2ms
5: 480x480 (no detections), 35.2ms
6: 480x480 (no detections), 35.2ms
7: 480x480 (no detections), 35.2ms
Speed: 0.0ms preprocess, 35.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  19%|█▉        | 97/500 [00:40<02:35,  2.60it/s]


0: 480x480 1 bench, 1 banana, 1 chair, 37.9ms
1: 480x480 4 umbrellas, 37.9ms
2: 480x480 1 person, 1 cell phone, 37.9ms
3: 480x480 (no detections), 37.9ms
4: 480x480 3 persons, 1 dog, 37.9ms
5: 480x480 4 zebras, 37.9ms
6: 480x480 1 bed, 2 teddy bears, 37.9ms
7: 480x480 3 persons, 4 bottles, 3 cups, 1 fork, 1 knife, 1 pizza, 1 dining table, 37.9ms
Speed: 0.0ms preprocess, 37.9ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  20%|█▉        | 98/500 [00:41<02:35,  2.58it/s]


0: 480x480 (no detections), 39.4ms
1: 480x480 (no detections), 39.4ms
2: 480x480 (no detections), 39.4ms
3: 480x480 (no detections), 39.4ms
4: 480x480 (no detections), 39.4ms
5: 480x480 (no detections), 39.4ms
6: 480x480 (no detections), 39.4ms
7: 480x480 (no detections), 39.4ms
Speed: 0.0ms preprocess, 39.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  20%|█▉        | 99/500 [00:41<02:35,  2.58it/s]


0: 480x480 (no detections), 36.3ms
1: 480x480 (no detections), 36.3ms
2: 480x480 (no detections), 36.3ms
3: 480x480 (no detections), 36.3ms
4: 480x480 (no detections), 36.3ms
5: 480x480 (no detections), 36.3ms
6: 480x480 (no detections), 36.3ms
7: 480x480 (no detections), 36.3ms
Speed: 0.0ms preprocess, 36.3ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  20%|██        | 100/500 [00:41<02:31,  2.63it/s]


0: 480x480 (no detections), 37.4ms
1: 480x480 (no detections), 37.4ms
2: 480x480 (no detections), 37.4ms
3: 480x480 (no detections), 37.4ms
4: 480x480 (no detections), 37.4ms
5: 480x480 (no detections), 37.4ms
6: 480x480 (no detections), 37.4ms
7: 480x480 (no detections), 37.4ms
Speed: 0.0ms preprocess, 37.4ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  20%|██        | 101/500 [00:42<02:29,  2.66it/s]


0: 480x480 1 person, 1 tennis racket, 39.4ms
1: 480x480 2 horses, 39.4ms
2: 480x480 3 persons, 39.4ms
3: 480x480 1 cell phone, 1 clock, 39.4ms
4: 480x480 2 benchs, 39.4ms
5: 480x480 3 cows, 39.4ms
6: 480x480 1 person, 1 umbrella, 1 skis, 39.4ms
7: 480x480 4 cows, 39.4ms
Speed: 0.0ms preprocess, 39.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  20%|██        | 102/500 [00:42<02:34,  2.58it/s]


0: 480x480 (no detections), 36.0ms
1: 480x480 (no detections), 36.0ms
2: 480x480 (no detections), 36.0ms
3: 480x480 (no detections), 36.0ms
4: 480x480 (no detections), 36.0ms
5: 480x480 (no detections), 36.0ms
6: 480x480 (no detections), 36.0ms
7: 480x480 (no detections), 36.0ms
Speed: 0.0ms preprocess, 36.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  21%|██        | 103/500 [00:43<02:29,  2.66it/s]


0: 480x480 (no detections), 37.2ms
1: 480x480 (no detections), 37.2ms
2: 480x480 (no detections), 37.2ms
3: 480x480 (no detections), 37.2ms
4: 480x480 (no detections), 37.2ms
5: 480x480 (no detections), 37.2ms
6: 480x480 (no detections), 37.2ms
7: 480x480 (no detections), 37.2ms
Speed: 0.0ms preprocess, 37.2ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  21%|██        | 104/500 [00:43<02:27,  2.68it/s]


0: 480x480 (no detections), 35.0ms
1: 480x480 (no detections), 35.0ms
2: 480x480 (no detections), 35.0ms
3: 480x480 (no detections), 35.0ms
4: 480x480 (no detections), 35.0ms
5: 480x480 (no detections), 35.0ms
6: 480x480 (no detections), 35.0ms
7: 480x480 (no detections), 35.0ms
Speed: 0.0ms preprocess, 35.0ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  21%|██        | 105/500 [00:43<02:24,  2.73it/s]


0: 480x480 6 persons, 1 bicycle, 8 cars, 1 truck, 34.0ms
1: 480x480 1 bus, 34.0ms
2: 480x480 1 person, 34.0ms
3: 480x480 1 stop sign, 34.0ms
4: 480x480 2 birds, 34.0ms
5: 480x480 9 persons, 1 bottle, 1 pizza, 34.0ms
6: 480x480 1 zebra, 34.0ms
7: 480x480 3 persons, 1 baseball bat, 1 baseball glove, 34.0ms
Speed: 0.0ms preprocess, 34.0ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  21%|██        | 106/500 [00:44<02:22,  2.76it/s]


0: 480x480 (no detections), 37.2ms
1: 480x480 (no detections), 37.2ms
2: 480x480 (no detections), 37.2ms
3: 480x480 (no detections), 37.2ms
4: 480x480 (no detections), 37.2ms
5: 480x480 (no detections), 37.2ms
6: 480x480 (no detections), 37.2ms
7: 480x480 (no detections), 37.2ms
Speed: 0.0ms preprocess, 37.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  21%|██▏       | 107/500 [00:44<02:24,  2.73it/s]


0: 480x480 (no detections), 43.6ms
1: 480x480 (no detections), 43.6ms
2: 480x480 (no detections), 43.6ms
3: 480x480 (no detections), 43.6ms
4: 480x480 (no detections), 43.6ms
5: 480x480 (no detections), 43.6ms
6: 480x480 (no detections), 43.6ms
7: 480x480 (no detections), 43.6ms
Speed: 0.0ms preprocess, 43.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  22%|██▏       | 108/500 [00:44<02:31,  2.59it/s]


0: 480x480 (no detections), 37.9ms
1: 480x480 (no detections), 37.9ms
2: 480x480 (no detections), 37.9ms
3: 480x480 (no detections), 37.9ms
4: 480x480 (no detections), 37.9ms
5: 480x480 (no detections), 37.9ms
6: 480x480 (no detections), 37.9ms
7: 480x480 (no detections), 37.9ms
Speed: 0.0ms preprocess, 37.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  22%|██▏       | 109/500 [00:45<02:28,  2.63it/s]


0: 480x480 (no detections), 45.9ms
1: 480x480 (no detections), 45.9ms
2: 480x480 (no detections), 45.9ms
3: 480x480 (no detections), 45.9ms
4: 480x480 (no detections), 45.9ms
5: 480x480 (no detections), 45.9ms
6: 480x480 (no detections), 45.9ms
7: 480x480 (no detections), 45.9ms
Speed: 0.0ms preprocess, 45.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  22%|██▏       | 110/500 [00:45<02:35,  2.51it/s]


0: 480x480 1 bird, 38.8ms
1: 480x480 1 laptop, 1 book, 38.8ms
2: 480x480 2 cars, 2 buss, 1 umbrella, 38.8ms
3: 480x480 1 chair, 2 couchs, 1 potted plant, 3 books, 38.8ms
4: 480x480 7 persons, 1 bus, 3 handbags, 38.8ms
5: 480x480 (no detections), 38.8ms
6: 480x480 1 person, 1 frisbee, 38.8ms
7: 480x480 7 bananas, 1 apple, 1 dining table, 1 cell phone, 38.8ms
Speed: 0.0ms preprocess, 38.8ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  22%|██▏       | 111/500 [00:46<02:34,  2.52it/s]


0: 480x480 (no detections), 40.6ms
1: 480x480 (no detections), 40.6ms
2: 480x480 (no detections), 40.6ms
3: 480x480 (no detections), 40.6ms
4: 480x480 (no detections), 40.6ms
5: 480x480 (no detections), 40.6ms
6: 480x480 (no detections), 40.6ms
7: 480x480 (no detections), 40.6ms
Speed: 0.0ms preprocess, 40.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  22%|██▏       | 112/500 [00:46<02:33,  2.53it/s]


0: 480x480 2 persons, 1 cell phone, 39.6ms
1: 480x480 (no detections), 39.6ms
2: 480x480 3 persons, 1 baseball bat, 1 baseball glove, 39.6ms
3: 480x480 3 clocks, 39.6ms
4: 480x480 1 train, 39.6ms
5: 480x480 1 person, 1 baseball glove, 39.6ms
6: 480x480 1 person, 1 kite, 39.6ms
7: 480x480 3 persons, 39.6ms
Speed: 0.0ms preprocess, 39.6ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  23%|██▎       | 113/500 [00:46<02:33,  2.53it/s]


0: 480x480 1 carrot, 3 hot dogs, 35.3ms
1: 480x480 5 persons, 2 cars, 35.3ms
2: 480x480 (no detections), 35.3ms
3: 480x480 2 cups, 1 laptop, 2 keyboards, 1 cell phone, 35.3ms
4: 480x480 1 cat, 35.3ms
5: 480x480 1 cake, 35.3ms
6: 480x480 1 clock, 35.3ms
7: 480x480 1 person, 2 skiss, 35.3ms
Speed: 0.0ms preprocess, 35.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  23%|██▎       | 114/500 [00:47<02:29,  2.58it/s]


0: 480x480 2 bowls, 38.6ms
1: 480x480 4 bottles, 38.6ms
2: 480x480 2 persons, 1 skateboard, 38.6ms
3: 480x480 2 persons, 38.6ms
4: 480x480 (no detections), 38.6ms
5: 480x480 1 bear, 38.6ms
6: 480x480 1 person, 1 boat, 1 skateboard, 38.6ms
7: 480x480 3 persons, 1 handbag, 38.6ms
Speed: 0.0ms preprocess, 38.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  23%|██▎       | 115/500 [00:47<02:37,  2.44it/s]


0: 480x480 2 persons, 35.6ms
1: 480x480 1 person, 1 train, 2 boats, 35.6ms
2: 480x480 1 traffic light, 35.6ms
3: 480x480 18 persons, 1 tennis racket, 2 chairs, 35.6ms
4: 480x480 1 tie, 35.6ms
5: 480x480 1 person, 1 train, 35.6ms
6: 480x480 (no detections), 35.6ms
7: 480x480 3 persons, 3 buss, 1 traffic light, 35.6ms
Speed: 0.0ms preprocess, 35.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  23%|██▎       | 116/500 [00:48<02:37,  2.43it/s]


0: 480x480 11 persons, 1 baseball bat, 40.2ms
1: 480x480 3 persons, 1 boat, 1 umbrella, 1 kite, 40.2ms
2: 480x480 1 bird, 40.2ms
3: 480x480 1 bus, 1 train, 40.2ms
4: 480x480 3 persons, 1 bird, 3 horses, 40.2ms
5: 480x480 1 person, 1 car, 1 airplane, 40.2ms
6: 480x480 1 car, 40.2ms
7: 480x480 1 bottle, 1 refrigerator, 1 clock, 40.2ms
Speed: 0.0ms preprocess, 40.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  23%|██▎       | 117/500 [00:48<02:41,  2.38it/s]


0: 480x480 (no detections), 42.5ms
1: 480x480 (no detections), 42.5ms
2: 480x480 (no detections), 42.5ms
3: 480x480 (no detections), 42.5ms
4: 480x480 (no detections), 42.5ms
5: 480x480 (no detections), 42.5ms
6: 480x480 (no detections), 42.5ms
7: 480x480 (no detections), 42.5ms
Speed: 0.0ms preprocess, 42.5ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  24%|██▎       | 118/500 [00:49<02:44,  2.32it/s]


0: 480x480 (no detections), 38.0ms
1: 480x480 (no detections), 38.0ms
2: 480x480 (no detections), 38.0ms
3: 480x480 (no detections), 38.0ms
4: 480x480 (no detections), 38.0ms
5: 480x480 (no detections), 38.0ms
6: 480x480 (no detections), 38.0ms
7: 480x480 (no detections), 38.0ms
Speed: 0.0ms preprocess, 38.0ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  24%|██▍       | 119/500 [00:49<02:41,  2.35it/s]


0: 480x480 (no detections), 39.1ms
1: 480x480 (no detections), 39.1ms
2: 480x480 (no detections), 39.1ms
3: 480x480 (no detections), 39.1ms
4: 480x480 (no detections), 39.1ms
5: 480x480 (no detections), 39.1ms
6: 480x480 (no detections), 39.1ms
7: 480x480 (no detections), 39.1ms
Speed: 0.0ms preprocess, 39.1ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  24%|██▍       | 120/500 [00:49<02:42,  2.33it/s]


0: 480x480 (no detections), 35.9ms
1: 480x480 (no detections), 35.9ms
2: 480x480 (no detections), 35.9ms
3: 480x480 (no detections), 35.9ms
4: 480x480 (no detections), 35.9ms
5: 480x480 (no detections), 35.9ms
6: 480x480 (no detections), 35.9ms
7: 480x480 (no detections), 35.9ms
Speed: 0.0ms preprocess, 35.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  24%|██▍       | 121/500 [00:50<02:41,  2.35it/s]


0: 480x480 1 bus, 37.0ms
1: 480x480 1 person, 1 horse, 37.0ms
2: 480x480 1 person, 37.0ms
3: 480x480 4 potted plants, 1 dining table, 1 vase, 37.0ms
4: 480x480 4 persons, 1 car, 2 fire hydrants, 37.0ms
5: 480x480 1 person, 37.0ms
6: 480x480 4 persons, 1 tie, 4 cakes, 37.0ms
7: 480x480 1 clock, 37.0ms
Speed: 0.0ms preprocess, 37.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  24%|██▍       | 122/500 [00:50<02:42,  2.32it/s]


0: 480x480 2 umbrellas, 7 kites, 38.9ms
1: 480x480 1 clock, 38.9ms
2: 480x480 2 persons, 2 cups, 1 bowl, 1 pizza, 1 dining table, 38.9ms
3: 480x480 4 bananas, 1 orange, 2 carrots, 38.9ms
4: 480x480 2 cars, 38.9ms
5: 480x480 3 persons, 4 bicycles, 2 motorcycles, 38.9ms
6: 480x480 1 person, 2 bottles, 1 cup, 1 pizza, 1 dining table, 38.9ms
7: 480x480 1 pizza, 38.9ms
Speed: 0.0ms preprocess, 38.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  25%|██▍       | 123/500 [00:51<02:44,  2.30it/s]


0: 480x480 14 persons, 37.8ms
1: 480x480 2 persons, 5 cars, 37.8ms
2: 480x480 1 person, 1 snowboard, 37.8ms
3: 480x480 2 persons, 1 boat, 2 surfboards, 37.8ms
4: 480x480 5 persons, 4 kites, 37.8ms
5: 480x480 2 persons, 1 boat, 37.8ms
6: 480x480 (no detections), 37.8ms
7: 480x480 11 persons, 4 umbrellas, 1 handbag, 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  25%|██▍       | 124/500 [00:51<02:43,  2.31it/s]


0: 480x480 (no detections), 39.2ms
1: 480x480 (no detections), 39.2ms
2: 480x480 (no detections), 39.2ms
3: 480x480 (no detections), 39.2ms
4: 480x480 (no detections), 39.2ms
5: 480x480 (no detections), 39.2ms
6: 480x480 (no detections), 39.2ms
7: 480x480 (no detections), 39.2ms
Speed: 0.0ms preprocess, 39.2ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  25%|██▌       | 125/500 [00:52<02:41,  2.32it/s]


0: 480x480 (no detections), 38.6ms
1: 480x480 (no detections), 38.6ms
2: 480x480 (no detections), 38.6ms
3: 480x480 (no detections), 38.6ms
4: 480x480 (no detections), 38.6ms
5: 480x480 (no detections), 38.6ms
6: 480x480 (no detections), 38.6ms
7: 480x480 (no detections), 38.6ms
Speed: 0.0ms preprocess, 38.6ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  25%|██▌       | 126/500 [00:52<02:40,  2.33it/s]


0: 480x480 1 bottle, 1 wine glass, 1 pizza, 37.7ms
1: 480x480 (no detections), 37.7ms
2: 480x480 6 persons, 1 umbrella, 37.7ms
3: 480x480 2 persons, 1 train, 37.7ms
4: 480x480 2 persons, 1 skateboard, 37.7ms
5: 480x480 1 bowl, 1 banana, 37.7ms
6: 480x480 4 cars, 1 bus, 1 truck, 37.7ms
7: 480x480 2 cars, 1 fire hydrant, 37.7ms
Speed: 0.0ms preprocess, 37.7ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  25%|██▌       | 127/500 [00:52<02:40,  2.32it/s]


0: 480x480 1 tv, 1 laptop, 39.0ms
1: 480x480 66 birds, 39.0ms
2: 480x480 1 toilet, 1 sink, 39.0ms
3: 480x480 3 cars, 1 truck, 39.0ms
4: 480x480 (no detections), 39.0ms
5: 480x480 5 persons, 2 bananas, 39.0ms
6: 480x480 1 bus, 1 truck, 39.0ms
7: 480x480 7 persons, 1 car, 1 bus, 39.0ms
Speed: 0.0ms preprocess, 39.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  26%|██▌       | 128/500 [00:53<02:45,  2.25it/s]


0: 480x480 (no detections), 37.6ms
1: 480x480 (no detections), 37.6ms
2: 480x480 (no detections), 37.6ms
3: 480x480 (no detections), 37.6ms
4: 480x480 (no detections), 37.6ms
5: 480x480 (no detections), 37.6ms
6: 480x480 (no detections), 37.6ms
7: 480x480 (no detections), 37.6ms
Speed: 0.0ms preprocess, 37.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  26%|██▌       | 129/500 [00:53<02:45,  2.25it/s]


0: 480x480 1 person, 1 skateboard, 38.2ms
1: 480x480 1 motorcycle, 38.2ms
2: 480x480 2 persons, 2 dogs, 1 frisbee, 1 surfboard, 38.2ms
3: 480x480 (no detections), 38.2ms
4: 480x480 1 person, 1 bird, 38.2ms
5: 480x480 4 persons, 2 umbrellas, 38.2ms
6: 480x480 2 persons, 1 fire hydrant, 38.2ms
7: 480x480 2 persons, 3 cups, 1 cake, 38.2ms
Speed: 0.0ms preprocess, 38.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  26%|██▌       | 130/500 [00:54<02:45,  2.24it/s]


0: 480x480 1 person, 5 bottles, 3 cups, 1 oven, 36.3ms
1: 480x480 5 persons, 1 skateboard, 36.3ms
2: 480x480 1 boat, 1 zebra, 36.3ms
3: 480x480 4 persons, 36.3ms
4: 480x480 1 person, 1 snowboard, 36.3ms
5: 480x480 (no detections), 36.3ms
6: 480x480 3 persons, 36.3ms
7: 480x480 4 persons, 1 tie, 36.3ms
Speed: 0.0ms preprocess, 36.3ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  26%|██▌       | 131/500 [00:54<02:42,  2.27it/s]


0: 480x480 1 giraffe, 38.7ms
1: 480x480 6 persons, 1 bottle, 5 cups, 1 dining table, 38.7ms
2: 480x480 2 trains, 38.7ms
3: 480x480 1 bus, 1 dog, 38.7ms
4: 480x480 1 person, 1 motorcycle, 38.7ms
5: 480x480 1 person, 1 tie, 38.7ms
6: 480x480 2 persons, 1 sports ball, 1 tennis racket, 38.7ms
7: 480x480 5 sheeps, 3 cows, 38.7ms
Speed: 0.0ms preprocess, 38.7ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:  26%|██▋       | 132/500 [00:55<02:44,  2.24it/s]


0: 480x480 (no detections), 36.9ms
1: 480x480 (no detections), 36.9ms
2: 480x480 (no detections), 36.9ms
3: 480x480 (no detections), 36.9ms
4: 480x480 (no detections), 36.9ms
5: 480x480 (no detections), 36.9ms
6: 480x480 (no detections), 36.9ms
7: 480x480 (no detections), 36.9ms
Speed: 0.0ms preprocess, 36.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  27%|██▋       | 133/500 [00:55<02:39,  2.29it/s]


0: 480x480 (no detections), 37.1ms
1: 480x480 (no detections), 37.1ms
2: 480x480 (no detections), 37.1ms
3: 480x480 (no detections), 37.1ms
4: 480x480 (no detections), 37.1ms
5: 480x480 (no detections), 37.1ms
6: 480x480 (no detections), 37.1ms
7: 480x480 (no detections), 37.1ms
Speed: 0.0ms preprocess, 37.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  27%|██▋       | 134/500 [00:55<02:36,  2.33it/s]


0: 480x480 (no detections), 36.4ms
1: 480x480 (no detections), 36.4ms
2: 480x480 (no detections), 36.4ms
3: 480x480 (no detections), 36.4ms
4: 480x480 (no detections), 36.4ms
5: 480x480 (no detections), 36.4ms
6: 480x480 (no detections), 36.4ms
7: 480x480 (no detections), 36.4ms
Speed: 0.0ms preprocess, 36.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  27%|██▋       | 135/500 [00:56<02:34,  2.36it/s]


0: 480x480 1 cake, 36.1ms
1: 480x480 1 cat, 36.1ms
2: 480x480 2 trains, 36.1ms
3: 480x480 2 traffic lights, 36.1ms
4: 480x480 12 persons, 36.1ms
5: 480x480 3 persons, 6 bottles, 1 cup, 9 pizzas, 36.1ms
6: 480x480 2 bottles, 1 banana, 1 chair, 1 dining table, 2 clocks, 1 vase, 36.1ms
7: 480x480 2 persons, 1 handbag, 4 clocks, 36.1ms
Speed: 0.0ms preprocess, 36.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  27%|██▋       | 136/500 [00:56<02:37,  2.31it/s]


0: 480x480 (no detections), 38.1ms
1: 480x480 (no detections), 38.1ms
2: 480x480 (no detections), 38.1ms
3: 480x480 (no detections), 38.1ms
4: 480x480 (no detections), 38.1ms
5: 480x480 (no detections), 38.1ms
6: 480x480 (no detections), 38.1ms
7: 480x480 (no detections), 38.1ms
Speed: 0.0ms preprocess, 38.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  27%|██▋       | 137/500 [00:57<02:35,  2.34it/s]


0: 480x480 1 person, 1 car, 1 clock, 37.3ms
1: 480x480 10 persons, 1 handbag, 3 cups, 1 bowl, 37.3ms
2: 480x480 1 cat, 2 toilets, 37.3ms
3: 480x480 2 persons, 37.3ms
4: 480x480 2 persons, 2 benchs, 37.3ms
5: 480x480 2 persons, 1 car, 2 trucks, 1 sheep, 37.3ms
6: 480x480 3 bowls, 2 bananas, 1 apple, 37.3ms
7: 480x480 (no detections), 37.3ms
Speed: 0.0ms preprocess, 37.3ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  28%|██▊       | 138/500 [00:57<02:35,  2.33it/s]


0: 480x480 1 person, 34.4ms
1: 480x480 4 persons, 34.4ms
2: 480x480 2 bananas, 34.4ms
3: 480x480 1 zebra, 34.4ms
4: 480x480 6 persons, 2 kites, 34.4ms
5: 480x480 1 chair, 2 beds, 2 teddy bears, 34.4ms
6: 480x480 1 microwave, 1 refrigerator, 34.4ms
7: 480x480 1 giraffe, 34.4ms
Speed: 0.0ms preprocess, 34.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  28%|██▊       | 139/500 [00:58<02:34,  2.33it/s]


0: 480x480 1 person, 1 car, 1 truck, 2 pizzas, 35.0ms
1: 480x480 1 knife, 35.0ms
2: 480x480 10 persons, 9 boats, 1 sports ball, 1 surfboard, 35.0ms
3: 480x480 2 boats, 35.0ms
4: 480x480 6 persons, 3 baseball bats, 1 baseball glove, 35.0ms
5: 480x480 5 tennis rackets, 35.0ms
6: 480x480 2 persons, 35.0ms
7: 480x480 2 cars, 1 clock, 35.0ms
Speed: 0.0ms preprocess, 35.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  28%|██▊       | 140/500 [00:58<02:32,  2.36it/s]


0: 480x480 (no detections), 34.6ms
1: 480x480 (no detections), 34.6ms
2: 480x480 (no detections), 34.6ms
3: 480x480 (no detections), 34.6ms
4: 480x480 (no detections), 34.6ms
5: 480x480 (no detections), 34.6ms
6: 480x480 (no detections), 34.6ms
7: 480x480 (no detections), 34.6ms
Speed: 0.0ms preprocess, 34.6ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  28%|██▊       | 141/500 [00:58<02:28,  2.41it/s]


0: 480x480 (no detections), 40.6ms
1: 480x480 (no detections), 40.6ms
2: 480x480 1 clock, 40.6ms
3: 480x480 (no detections), 40.6ms
4: 480x480 3 persons, 40.6ms
5: 480x480 1 person, 40.6ms
6: 480x480 5 persons, 40.6ms
7: 480x480 1 person, 11 cars, 1 traffic light, 40.6ms
Speed: 0.0ms preprocess, 40.6ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  28%|██▊       | 142/500 [00:59<02:32,  2.35it/s]


0: 480x480 13 persons, 5 kites, 37.1ms
1: 480x480 1 cat, 37.1ms
2: 480x480 1 toilet, 1 sink, 37.1ms
3: 480x480 2 zebras, 37.1ms
4: 480x480 6 persons, 37.1ms
5: 480x480 1 person, 1 frisbee, 37.1ms
6: 480x480 2 pizzas, 37.1ms
7: 480x480 2 persons, 37.1ms
Speed: 0.0ms preprocess, 37.1ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 480)


Validating:  29%|██▊       | 143/500 [00:59<02:31,  2.35it/s]


0: 480x480 7 persons, 1 kite, 35.0ms
1: 480x480 1 teddy bear, 35.0ms
2: 480x480 1 mouse, 35.0ms
3: 480x480 1 clock, 35.0ms
4: 480x480 2 persons, 35.0ms
5: 480x480 1 microwave, 1 oven, 35.0ms
6: 480x480 1 person, 1 remote, 35.0ms
7: 480x480 10 persons, 1 snowboard, 35.0ms
Speed: 0.0ms preprocess, 35.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  29%|██▉       | 144/500 [01:00<02:30,  2.36it/s]


0: 480x480 (no detections), 36.8ms
1: 480x480 (no detections), 36.8ms
2: 480x480 (no detections), 36.8ms
3: 480x480 (no detections), 36.8ms
4: 480x480 (no detections), 36.8ms
5: 480x480 (no detections), 36.8ms
6: 480x480 (no detections), 36.8ms
7: 480x480 (no detections), 36.8ms
Speed: 0.0ms preprocess, 36.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  29%|██▉       | 145/500 [01:00<02:30,  2.36it/s]


0: 480x480 1 person, 1 tv, 38.3ms
1: 480x480 3 birds, 2 umbrellas, 38.3ms
2: 480x480 1 cat, 1 wine glass, 38.3ms
3: 480x480 13 birds, 38.3ms
4: 480x480 5 cars, 15 traffic lights, 38.3ms
5: 480x480 2 traffic lights, 1 bench, 38.3ms
6: 480x480 (no detections), 38.3ms
7: 480x480 1 clock, 38.3ms
Speed: 0.0ms preprocess, 38.3ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:  29%|██▉       | 146/500 [01:01<02:32,  2.32it/s]


0: 480x480 (no detections), 38.0ms
1: 480x480 (no detections), 38.0ms
2: 480x480 (no detections), 38.0ms
3: 480x480 (no detections), 38.0ms
4: 480x480 (no detections), 38.0ms
5: 480x480 (no detections), 38.0ms
6: 480x480 (no detections), 38.0ms
7: 480x480 (no detections), 38.0ms
Speed: 0.0ms preprocess, 38.0ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  29%|██▉       | 147/500 [01:01<02:32,  2.32it/s]


0: 480x480 (no detections), 40.1ms
1: 480x480 1 clock, 40.1ms
2: 480x480 1 umbrella, 40.1ms
3: 480x480 1 airplane, 40.1ms
4: 480x480 1 apple, 1 orange, 40.1ms
5: 480x480 1 bottle, 1 toilet, 2 sinks, 40.1ms
6: 480x480 (no detections), 40.1ms
7: 480x480 2 clocks, 40.1ms
Speed: 0.0ms preprocess, 40.1ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  30%|██▉       | 148/500 [01:02<02:35,  2.27it/s]


0: 480x480 1 person, 1 fire hydrant, 38.8ms
1: 480x480 1 potted plant, 2 vases, 38.8ms
2: 480x480 1 refrigerator, 38.8ms
3: 480x480 (no detections), 38.8ms
4: 480x480 3 persons, 38.8ms
5: 480x480 1 person, 38.8ms
6: 480x480 10 persons, 1 car, 4 potted plants, 38.8ms
7: 480x480 3 persons, 1 bench, 2 skateboards, 38.8ms
Speed: 0.0ms preprocess, 38.8ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  30%|██▉       | 149/500 [01:02<02:37,  2.23it/s]


0: 480x480 2 persons, 2 bicycles, 2 cars, 1 truck, 2 traffic lights, 35.0ms
1: 480x480 1 person, 1 tie, 35.0ms
2: 480x480 1 tie, 35.0ms
3: 480x480 2 persons, 1 bed, 35.0ms
4: 480x480 1 bed, 35.0ms
5: 480x480 1 person, 1 sports ball, 1 tennis racket, 35.0ms
6: 480x480 1 dining table, 35.0ms
7: 480x480 2 persons, 2 surfboards, 35.0ms
Speed: 0.0ms preprocess, 35.0ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  30%|███       | 150/500 [01:02<02:34,  2.27it/s]


0: 480x480 (no detections), 36.7ms
1: 480x480 (no detections), 36.7ms
2: 480x480 (no detections), 36.7ms
3: 480x480 (no detections), 36.7ms
4: 480x480 (no detections), 36.7ms
5: 480x480 (no detections), 36.7ms
6: 480x480 (no detections), 36.7ms
7: 480x480 (no detections), 36.7ms
Speed: 0.0ms preprocess, 36.7ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  30%|███       | 151/500 [01:03<02:32,  2.29it/s]


0: 480x480 2 persons, 38.4ms
1: 480x480 1 person, 1 sports ball, 1 tennis racket, 38.4ms
2: 480x480 (no detections), 38.4ms
3: 480x480 1 car, 1 horse, 38.4ms
4: 480x480 1 pizza, 38.4ms
5: 480x480 4 sheeps, 38.4ms
6: 480x480 2 wine glasss, 1 fork, 1 knife, 1 pizza, 38.4ms
7: 480x480 2 bottles, 2 tvs, 38.4ms
Speed: 0.0ms preprocess, 38.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  30%|███       | 152/500 [01:03<02:34,  2.26it/s]


0: 480x480 (no detections), 35.8ms
1: 480x480 (no detections), 35.8ms
2: 480x480 (no detections), 35.8ms
3: 480x480 (no detections), 35.8ms
4: 480x480 (no detections), 35.8ms
5: 480x480 (no detections), 35.8ms
6: 480x480 (no detections), 35.8ms
7: 480x480 (no detections), 35.8ms
Speed: 0.0ms preprocess, 35.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  31%|███       | 153/500 [01:04<02:30,  2.30it/s]


0: 480x480 11 persons, 3 clocks, 38.3ms
1: 480x480 1 bottle, 1 dining table, 38.3ms
2: 480x480 (no detections), 38.3ms
3: 480x480 2 persons, 1 bench, 1 dining table, 38.3ms
4: 480x480 2 cups, 1 cake, 38.3ms
5: 480x480 2 persons, 18 elephants, 38.3ms
6: 480x480 1 truck, 38.3ms
7: 480x480 1 person, 1 bed, 2 teddy bears, 38.3ms
Speed: 0.0ms preprocess, 38.3ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  31%|███       | 154/500 [01:04<02:33,  2.26it/s]


0: 480x480 (no detections), 37.8ms
1: 480x480 (no detections), 37.8ms
2: 480x480 (no detections), 37.8ms
3: 480x480 (no detections), 37.8ms
4: 480x480 (no detections), 37.8ms
5: 480x480 (no detections), 37.8ms
6: 480x480 (no detections), 37.8ms
7: 480x480 (no detections), 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  31%|███       | 155/500 [01:05<02:31,  2.28it/s]


0: 480x480 (no detections), 36.8ms
1: 480x480 (no detections), 36.8ms
2: 480x480 (no detections), 36.8ms
3: 480x480 (no detections), 36.8ms
4: 480x480 (no detections), 36.8ms
5: 480x480 (no detections), 36.8ms
6: 480x480 (no detections), 36.8ms
7: 480x480 (no detections), 36.8ms
Speed: 0.0ms preprocess, 36.8ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 480)


Validating:  31%|███       | 156/500 [01:05<02:28,  2.32it/s]


0: 480x480 1 person, 2 beds, 36.2ms
1: 480x480 3 persons, 1 sports ball, 4 tennis rackets, 36.2ms
2: 480x480 1 person, 1 surfboard, 36.2ms
3: 480x480 (no detections), 36.2ms
4: 480x480 2 persons, 1 bottle, 36.2ms
5: 480x480 1 person, 1 surfboard, 36.2ms
6: 480x480 1 bowl, 7 carrots, 36.2ms
7: 480x480 2 cars, 1 bus, 36.2ms
Speed: 0.0ms preprocess, 36.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  31%|███▏      | 157/500 [01:05<02:31,  2.26it/s]


0: 480x480 (no detections), 37.6ms
1: 480x480 (no detections), 37.6ms
2: 480x480 (no detections), 37.6ms
3: 480x480 (no detections), 37.6ms
4: 480x480 (no detections), 37.6ms
5: 480x480 (no detections), 37.6ms
6: 480x480 (no detections), 37.6ms
7: 480x480 (no detections), 37.6ms
Speed: 0.0ms preprocess, 37.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  32%|███▏      | 158/500 [01:06<02:29,  2.28it/s]


0: 480x480 1 sports ball, 1 tennis racket, 1 teddy bear, 39.5ms
1: 480x480 1 cat, 2 laptops, 39.5ms
2: 480x480 5 persons, 1 sports ball, 1 tennis racket, 39.5ms
3: 480x480 7 persons, 1 tennis racket, 2 bottles, 1 chair, 39.5ms
4: 480x480 1 clock, 39.5ms
5: 480x480 1 motorcycle, 39.5ms
6: 480x480 5 persons, 2 hot dogs, 1 chair, 1 dining table, 39.5ms
7: 480x480 (no detections), 39.5ms
Speed: 0.0ms preprocess, 39.5ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  32%|███▏      | 159/500 [01:06<02:31,  2.26it/s]


0: 480x480 (no detections), 37.6ms
1: 480x480 (no detections), 37.6ms
2: 480x480 (no detections), 37.6ms
3: 480x480 (no detections), 37.6ms
4: 480x480 (no detections), 37.6ms
5: 480x480 (no detections), 37.6ms
6: 480x480 (no detections), 37.6ms
7: 480x480 (no detections), 37.6ms
Speed: 0.0ms preprocess, 37.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  32%|███▏      | 160/500 [01:07<02:28,  2.28it/s]


0: 480x480 1 person, 1 cell phone, 42.4ms
1: 480x480 1 bus, 42.4ms
2: 480x480 1 person, 1 umbrella, 1 kite, 42.4ms
3: 480x480 8 persons, 1 car, 1 truck, 42.4ms
4: 480x480 4 persons, 6 sheeps, 42.4ms
5: 480x480 1 person, 1 car, 2 benchs, 4 potted plants, 42.4ms
6: 480x480 1 cat, 1 bottle, 42.4ms
7: 480x480 1 person, 42.4ms
Speed: 0.0ms preprocess, 42.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  32%|███▏      | 161/500 [01:07<02:32,  2.22it/s]


0: 480x480 (no detections), 37.7ms
1: 480x480 (no detections), 37.7ms
2: 480x480 (no detections), 37.7ms
3: 480x480 (no detections), 37.7ms
4: 480x480 (no detections), 37.7ms
5: 480x480 (no detections), 37.7ms
6: 480x480 (no detections), 37.7ms
7: 480x480 (no detections), 37.7ms
Speed: 0.0ms preprocess, 37.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  32%|███▏      | 162/500 [01:08<02:30,  2.24it/s]


0: 480x480 (no detections), 34.6ms
1: 480x480 (no detections), 34.6ms
2: 480x480 (no detections), 34.6ms
3: 480x480 (no detections), 34.6ms
4: 480x480 (no detections), 34.6ms
5: 480x480 (no detections), 34.6ms
6: 480x480 (no detections), 34.6ms
7: 480x480 (no detections), 34.6ms
Speed: 0.0ms preprocess, 34.6ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  33%|███▎      | 163/500 [01:08<02:26,  2.29it/s]


0: 480x480 (no detections), 37.8ms
1: 480x480 (no detections), 37.8ms
2: 480x480 (no detections), 37.8ms
3: 480x480 (no detections), 37.8ms
4: 480x480 (no detections), 37.8ms
5: 480x480 (no detections), 37.8ms
6: 480x480 (no detections), 37.8ms
7: 480x480 (no detections), 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  33%|███▎      | 164/500 [01:09<02:25,  2.31it/s]


0: 480x480 1 person, 1 dog, 2 frisbees, 38.2ms
1: 480x480 1 person, 38.2ms
2: 480x480 3 persons, 1 bicycle, 1 bus, 1 truck, 38.2ms
3: 480x480 1 umbrella, 38.2ms
4: 480x480 6 oranges, 38.2ms
5: 480x480 1 umbrella, 1 banana, 38.2ms
6: 480x480 2 persons, 3 remotes, 38.2ms
7: 480x480 1 skis, 1 snowboard, 38.2ms
Speed: 0.0ms preprocess, 38.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  33%|███▎      | 165/500 [01:09<02:26,  2.28it/s]


0: 480x480 (no detections), 41.0ms
1: 480x480 (no detections), 41.0ms
2: 480x480 (no detections), 41.0ms
3: 480x480 (no detections), 41.0ms
4: 480x480 (no detections), 41.0ms
5: 480x480 (no detections), 41.0ms
6: 480x480 (no detections), 41.0ms
7: 480x480 (no detections), 41.0ms
Speed: 0.0ms preprocess, 41.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  33%|███▎      | 166/500 [01:09<02:28,  2.25it/s]


0: 480x480 (no detections), 35.7ms
1: 480x480 (no detections), 35.7ms
2: 480x480 (no detections), 35.7ms
3: 480x480 (no detections), 35.7ms
4: 480x480 (no detections), 35.7ms
5: 480x480 (no detections), 35.7ms
6: 480x480 (no detections), 35.7ms
7: 480x480 (no detections), 35.7ms
Speed: 0.0ms preprocess, 35.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  33%|███▎      | 167/500 [01:10<02:24,  2.30it/s]


0: 480x480 (no detections), 41.2ms
1: 480x480 (no detections), 41.2ms
2: 480x480 (no detections), 41.2ms
3: 480x480 (no detections), 41.2ms
4: 480x480 (no detections), 41.2ms
5: 480x480 (no detections), 41.2ms
6: 480x480 (no detections), 41.2ms
7: 480x480 (no detections), 41.2ms
Speed: 0.0ms preprocess, 41.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  34%|███▎      | 168/500 [01:10<02:25,  2.28it/s]


0: 480x480 (no detections), 34.1ms
1: 480x480 (no detections), 34.1ms
2: 480x480 (no detections), 34.1ms
3: 480x480 (no detections), 34.1ms
4: 480x480 (no detections), 34.1ms
5: 480x480 (no detections), 34.1ms
6: 480x480 (no detections), 34.1ms
7: 480x480 (no detections), 34.1ms
Speed: 0.0ms preprocess, 34.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  34%|███▍      | 169/500 [01:11<02:20,  2.36it/s]


0: 480x480 (no detections), 35.3ms
1: 480x480 (no detections), 35.3ms
2: 480x480 (no detections), 35.3ms
3: 480x480 (no detections), 35.3ms
4: 480x480 (no detections), 35.3ms
5: 480x480 (no detections), 35.3ms
6: 480x480 (no detections), 35.3ms
7: 480x480 (no detections), 35.3ms
Speed: 0.0ms preprocess, 35.3ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  34%|███▍      | 170/500 [01:11<02:18,  2.39it/s]


0: 480x480 2 cars, 2 trucks, 1 clock, 36.8ms
1: 480x480 1 person, 36.8ms
2: 480x480 1 stop sign, 36.8ms
3: 480x480 1 cup, 1 fork, 1 knife, 1 bowl, 2 broccolis, 36.8ms
4: 480x480 3 persons, 2 horses, 36.8ms
5: 480x480 1 person, 1 surfboard, 36.8ms
6: 480x480 (no detections), 36.8ms
7: 480x480 2 persons, 19 umbrellas, 36.8ms
Speed: 0.0ms preprocess, 36.8ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  34%|███▍      | 171/500 [01:12<02:19,  2.36it/s]


0: 480x480 (no detections), 40.2ms
1: 480x480 (no detections), 40.2ms
2: 480x480 (no detections), 40.2ms
3: 480x480 (no detections), 40.2ms
4: 480x480 (no detections), 40.2ms
5: 480x480 (no detections), 40.2ms
6: 480x480 (no detections), 40.2ms
7: 480x480 (no detections), 40.2ms
Speed: 0.0ms preprocess, 40.2ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  34%|███▍      | 172/500 [01:12<02:20,  2.34it/s]


0: 480x480 (no detections), 35.5ms
1: 480x480 (no detections), 35.5ms
2: 480x480 (no detections), 35.5ms
3: 480x480 (no detections), 35.5ms
4: 480x480 (no detections), 35.5ms
5: 480x480 (no detections), 35.5ms
6: 480x480 (no detections), 35.5ms
7: 480x480 (no detections), 35.5ms
Speed: 0.0ms preprocess, 35.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  35%|███▍      | 173/500 [01:12<02:17,  2.38it/s]


0: 480x480 1 bowl, 2 bananas, 37.0ms
1: 480x480 1 bird, 37.0ms
2: 480x480 10 bottles, 1 cup, 2 pizzas, 1 book, 37.0ms
3: 480x480 2 persons, 13 bottles, 37.0ms
4: 480x480 1 bench, 37.0ms
5: 480x480 4 bottles, 1 sink, 37.0ms
6: 480x480 1 tv, 1 clock, 37.0ms
7: 480x480 1 person, 1 bowl, 2 beds, 1 clock, 37.0ms
Speed: 0.0ms preprocess, 37.0ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  35%|███▍      | 174/500 [01:13<02:18,  2.36it/s]


0: 480x480 1 tv, 1 laptop, 1 mouse, 39.5ms
1: 480x480 4 persons, 1 baseball bat, 1 baseball glove, 39.5ms
2: 480x480 1 airplane, 39.5ms
3: 480x480 1 car, 1 truck, 39.5ms
4: 480x480 1 bicycle, 1 bus, 39.5ms
5: 480x480 1 sandwich, 1 broccoli, 39.5ms
6: 480x480 2 pizzas, 39.5ms
7: 480x480 5 persons, 5 skiss, 39.5ms
Speed: 0.0ms preprocess, 39.5ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  35%|███▌      | 175/500 [01:13<02:23,  2.27it/s]


0: 480x480 (no detections), 42.8ms
1: 480x480 (no detections), 42.8ms
2: 480x480 (no detections), 42.8ms
3: 480x480 (no detections), 42.8ms
4: 480x480 (no detections), 42.8ms
5: 480x480 (no detections), 42.8ms
6: 480x480 (no detections), 42.8ms
7: 480x480 (no detections), 42.8ms
Speed: 0.0ms preprocess, 42.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  35%|███▌      | 176/500 [01:14<02:26,  2.21it/s]


0: 480x480 1 person, 1 skateboard, 37.1ms
1: 480x480 6 persons, 1 horse, 37.1ms
2: 480x480 1 person, 1 tennis racket, 37.1ms
3: 480x480 1 person, 1 surfboard, 37.1ms
4: 480x480 1 bed, 37.1ms
5: 480x480 (no detections), 37.1ms
6: 480x480 6 persons, 1 bottle, 1 cup, 1 knife, 1 bowl, 1 clock, 37.1ms
7: 480x480 1 airplane, 37.1ms
Speed: 0.0ms preprocess, 37.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  35%|███▌      | 177/500 [01:14<02:24,  2.23it/s]


0: 480x480 (no detections), 38.6ms
1: 480x480 (no detections), 38.6ms
2: 480x480 (no detections), 38.6ms
3: 480x480 (no detections), 38.6ms
4: 480x480 (no detections), 38.6ms
5: 480x480 (no detections), 38.6ms
6: 480x480 (no detections), 38.6ms
7: 480x480 (no detections), 38.6ms
Speed: 0.0ms preprocess, 38.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  36%|███▌      | 178/500 [01:15<02:21,  2.27it/s]


0: 480x480 (no detections), 37.9ms
1: 480x480 (no detections), 37.9ms
2: 480x480 (no detections), 37.9ms
3: 480x480 (no detections), 37.9ms
4: 480x480 (no detections), 37.9ms
5: 480x480 (no detections), 37.9ms
6: 480x480 (no detections), 37.9ms
7: 480x480 (no detections), 37.9ms
Speed: 0.0ms preprocess, 37.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  36%|███▌      | 179/500 [01:15<02:19,  2.30it/s]


0: 480x480 1 cell phone, 36.6ms
1: 480x480 2 umbrellas, 36.6ms
2: 480x480 3 persons, 3 wine glasss, 36.6ms
3: 480x480 1 person, 36.6ms
4: 480x480 2 persons, 1 sports ball, 1 tennis racket, 36.6ms
5: 480x480 5 persons, 1 baseball bat, 36.6ms
6: 480x480 15 persons, 1 bench, 36.6ms
7: 480x480 5 persons, 1 surfboard, 36.6ms
Speed: 0.0ms preprocess, 36.6ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:  36%|███▌      | 180/500 [01:15<02:21,  2.27it/s]


0: 480x480 3 cars, 1 fire hydrant, 37.7ms
1: 480x480 1 person, 37.7ms
2: 480x480 2 giraffes, 37.7ms
3: 480x480 1 person, 1 bench, 37.7ms
4: 480x480 1 person, 1 frisbee, 1 keyboard, 37.7ms
5: 480x480 (no detections), 37.7ms
6: 480x480 1 airplane, 37.7ms
7: 480x480 2 bottles, 8 chairs, 2 couchs, 3 potted plants, 1 tv, 10 books, 2 vases, 37.7ms
Speed: 0.0ms preprocess, 37.7ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  36%|███▌      | 181/500 [01:16<02:21,  2.26it/s]


0: 480x480 (no detections), 42.9ms
1: 480x480 (no detections), 42.9ms
2: 480x480 (no detections), 42.9ms
3: 480x480 (no detections), 42.9ms
4: 480x480 (no detections), 42.9ms
5: 480x480 (no detections), 42.9ms
6: 480x480 (no detections), 42.9ms
7: 480x480 (no detections), 42.9ms
Speed: 0.0ms preprocess, 42.9ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  36%|███▋      | 182/500 [01:16<02:22,  2.22it/s]


0: 480x480 (no detections), 36.6ms
1: 480x480 (no detections), 36.6ms
2: 480x480 (no detections), 36.6ms
3: 480x480 (no detections), 36.6ms
4: 480x480 (no detections), 36.6ms
5: 480x480 (no detections), 36.6ms
6: 480x480 (no detections), 36.6ms
7: 480x480 (no detections), 36.6ms
Speed: 0.0ms preprocess, 36.6ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  37%|███▋      | 183/500 [01:17<02:18,  2.29it/s]


0: 480x480 12 persons, 37.0ms
1: 480x480 7 cars, 6 kites, 37.0ms
2: 480x480 (no detections), 37.0ms
3: 480x480 1 bowl, 37.0ms
4: 480x480 2 persons, 1 bench, 37.0ms
5: 480x480 2 persons, 37.0ms
6: 480x480 10 sheeps, 37.0ms
7: 480x480 2 dogs, 1 toilet, 1 sink, 37.0ms
Speed: 0.0ms preprocess, 37.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  37%|███▋      | 184/500 [01:17<02:17,  2.30it/s]


0: 480x480 1 person, 39.3ms
1: 480x480 2 persons, 39.3ms
2: 480x480 3 persons, 1 surfboard, 39.3ms
3: 480x480 2 boats, 39.3ms
4: 480x480 1 fork, 1 cake, 1 dining table, 39.3ms
5: 480x480 (no detections), 39.3ms
6: 480x480 6 persons, 1 hot dog, 39.3ms
7: 480x480 4 persons, 2 bicycles, 16 cars, 10 buss, 7 trucks, 39.3ms
Speed: 0.0ms preprocess, 39.3ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  37%|███▋      | 185/500 [01:18<02:17,  2.29it/s]


0: 480x480 1 person, 37.3ms
1: 480x480 2 persons, 37.3ms
2: 480x480 8 persons, 1 bicycle, 1 bus, 3 trucks, 37.3ms
3: 480x480 1 wine glass, 1 cup, 1 fork, 1 spoon, 2 bowls, 37.3ms
4: 480x480 1 bird, 37.3ms
5: 480x480 1 toilet, 2 sinks, 37.3ms
6: 480x480 1 person, 1 surfboard, 37.3ms
7: 480x480 1 toilet, 37.3ms
Speed: 0.0ms preprocess, 37.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  37%|███▋      | 186/500 [01:18<02:16,  2.30it/s]


0: 480x480 (no detections), 38.9ms
1: 480x480 (no detections), 38.9ms
2: 480x480 (no detections), 38.9ms
3: 480x480 (no detections), 38.9ms
4: 480x480 (no detections), 38.9ms
5: 480x480 (no detections), 38.9ms
6: 480x480 (no detections), 38.9ms
7: 480x480 (no detections), 38.9ms
Speed: 0.0ms preprocess, 38.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  37%|███▋      | 187/500 [01:19<02:18,  2.26it/s]


0: 480x480 (no detections), 38.1ms
1: 480x480 (no detections), 38.1ms
2: 480x480 (no detections), 38.1ms
3: 480x480 (no detections), 38.1ms
4: 480x480 (no detections), 38.1ms
5: 480x480 (no detections), 38.1ms
6: 480x480 (no detections), 38.1ms
7: 480x480 (no detections), 38.1ms
Speed: 0.0ms preprocess, 38.1ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  38%|███▊      | 188/500 [01:19<02:17,  2.28it/s]


0: 480x480 1 bed, 38.8ms
1: 480x480 3 persons, 5 bottles, 38.8ms
2: 480x480 4 cars, 38.8ms
3: 480x480 1 car, 1 bus, 38.8ms
4: 480x480 1 train, 38.8ms
5: 480x480 5 apples, 4 oranges, 38.8ms
6: 480x480 2 persons, 38.8ms
7: 480x480 1 stop sign, 38.8ms
Speed: 0.0ms preprocess, 38.8ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  38%|███▊      | 189/500 [01:19<02:17,  2.27it/s]


0: 480x480 (no detections), 43.9ms
1: 480x480 (no detections), 43.9ms
2: 480x480 (no detections), 43.9ms
3: 480x480 (no detections), 43.9ms
4: 480x480 (no detections), 43.9ms
5: 480x480 (no detections), 43.9ms
6: 480x480 (no detections), 43.9ms
7: 480x480 (no detections), 43.9ms
Speed: 0.0ms preprocess, 43.9ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  38%|███▊      | 190/500 [01:20<02:20,  2.21it/s]


0: 480x480 (no detections), 39.4ms
1: 480x480 (no detections), 39.4ms
2: 480x480 (no detections), 39.4ms
3: 480x480 (no detections), 39.4ms
4: 480x480 (no detections), 39.4ms
5: 480x480 (no detections), 39.4ms
6: 480x480 (no detections), 39.4ms
7: 480x480 (no detections), 39.4ms
Speed: 0.0ms preprocess, 39.4ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  38%|███▊      | 191/500 [01:20<02:18,  2.24it/s]


0: 480x480 1 person, 1 skis, 34.1ms
1: 480x480 (no detections), 34.1ms
2: 480x480 1 giraffe, 34.1ms
3: 480x480 1 train, 34.1ms
4: 480x480 1 clock, 34.1ms
5: 480x480 1 apple, 4 oranges, 34.1ms
6: 480x480 (no detections), 34.1ms
7: 480x480 1 clock, 34.1ms
Speed: 0.0ms preprocess, 34.1ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  38%|███▊      | 192/500 [01:21<02:15,  2.27it/s]


0: 480x480 1 bed, 35.2ms
1: 480x480 4 clocks, 35.2ms
2: 480x480 1 person, 3 beds, 1 cell phone, 1 microwave, 35.2ms
3: 480x480 6 persons, 35.2ms
4: 480x480 1 person, 1 truck, 1 bench, 35.2ms
5: 480x480 (no detections), 35.2ms
6: 480x480 1 cat, 1 tv, 35.2ms
7: 480x480 1 dog, 1 chair, 1 couch, 35.2ms
Speed: 0.0ms preprocess, 35.2ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  39%|███▊      | 193/500 [01:21<02:13,  2.31it/s]


0: 480x480 (no detections), 39.5ms
1: 480x480 (no detections), 39.5ms
2: 480x480 (no detections), 39.5ms
3: 480x480 (no detections), 39.5ms
4: 480x480 (no detections), 39.5ms
5: 480x480 (no detections), 39.5ms
6: 480x480 (no detections), 39.5ms
7: 480x480 (no detections), 39.5ms
Speed: 0.0ms preprocess, 39.5ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  39%|███▉      | 194/500 [01:22<02:13,  2.30it/s]


0: 480x480 (no detections), 42.9ms
1: 480x480 (no detections), 42.9ms
2: 480x480 (no detections), 42.9ms
3: 480x480 (no detections), 42.9ms
4: 480x480 (no detections), 42.9ms
5: 480x480 (no detections), 42.9ms
6: 480x480 (no detections), 42.9ms
7: 480x480 (no detections), 42.9ms
Speed: 0.0ms preprocess, 42.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  39%|███▉      | 195/500 [01:22<02:15,  2.25it/s]


0: 480x480 5 persons, 3 benchs, 39.6ms
1: 480x480 9 persons, 8 boats, 39.6ms
2: 480x480 1 laptop, 1 keyboard, 39.6ms
3: 480x480 1 bottle, 1 microwave, 3 ovens, 1 refrigerator, 39.6ms
4: 480x480 1 bench, 39.6ms
5: 480x480 1 tie, 39.6ms
6: 480x480 1 tie, 3 carrots, 39.6ms
7: 480x480 1 giraffe, 39.6ms
Speed: 0.0ms preprocess, 39.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  39%|███▉      | 196/500 [01:23<02:16,  2.23it/s]


0: 480x480 (no detections), 36.0ms
1: 480x480 (no detections), 36.0ms
2: 480x480 (no detections), 36.0ms
3: 480x480 (no detections), 36.0ms
4: 480x480 (no detections), 36.0ms
5: 480x480 (no detections), 36.0ms
6: 480x480 (no detections), 36.0ms
7: 480x480 (no detections), 36.0ms
Speed: 0.0ms preprocess, 36.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  39%|███▉      | 197/500 [01:23<02:13,  2.28it/s]


0: 480x480 1 fire hydrant, 39.8ms
1: 480x480 3 persons, 1 surfboard, 39.8ms
2: 480x480 1 person, 39.8ms
3: 480x480 3 persons, 4 kites, 39.8ms
4: 480x480 2 persons, 1 umbrella, 39.8ms
5: 480x480 8 persons, 3 bottles, 7 cups, 1 knife, 3 bowls, 1 dining table, 39.8ms
6: 480x480 4 sheeps, 39.8ms
7: 480x480 1 car, 39.8ms
Speed: 0.0ms preprocess, 39.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  40%|███▉      | 198/500 [01:23<02:14,  2.24it/s]


0: 480x480 (no detections), 36.4ms
1: 480x480 (no detections), 36.4ms
2: 480x480 (no detections), 36.4ms
3: 480x480 (no detections), 36.4ms
4: 480x480 (no detections), 36.4ms
5: 480x480 (no detections), 36.4ms
6: 480x480 (no detections), 36.4ms
7: 480x480 (no detections), 36.4ms
Speed: 0.0ms preprocess, 36.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  40%|███▉      | 199/500 [01:24<02:11,  2.29it/s]


0: 480x480 1 bottle, 1 cup, 1 laptop, 2 keyboards, 42.2ms
1: 480x480 1 person, 1 bed, 42.2ms
2: 480x480 8 persons, 4 bicycles, 1 car, 1 motorcycle, 2 traffic lights, 1 fire hydrant, 1 handbag, 42.2ms
3: 480x480 (no detections), 42.2ms
4: 480x480 1 keyboard, 42.2ms
5: 480x480 4 persons, 3 cars, 42.2ms
6: 480x480 (no detections), 42.2ms
7: 480x480 1 person, 2 bicycles, 1 bottle, 42.2ms
Speed: 0.0ms preprocess, 42.2ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  40%|████      | 200/500 [01:24<02:16,  2.20it/s]


0: 480x480 (no detections), 35.8ms
1: 480x480 (no detections), 35.8ms
2: 480x480 (no detections), 35.8ms
3: 480x480 (no detections), 35.8ms
4: 480x480 (no detections), 35.8ms
5: 480x480 (no detections), 35.8ms
6: 480x480 (no detections), 35.8ms
7: 480x480 (no detections), 35.8ms
Speed: 0.0ms preprocess, 35.8ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  40%|████      | 201/500 [01:25<02:12,  2.25it/s]


0: 480x480 (no detections), 36.7ms
1: 480x480 (no detections), 36.7ms
2: 480x480 (no detections), 36.7ms
3: 480x480 (no detections), 36.7ms
4: 480x480 (no detections), 36.7ms
5: 480x480 (no detections), 36.7ms
6: 480x480 (no detections), 36.7ms
7: 480x480 (no detections), 36.7ms
Speed: 0.0ms preprocess, 36.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  40%|████      | 202/500 [01:25<02:09,  2.31it/s]


0: 480x480 (no detections), 37.2ms
1: 480x480 (no detections), 37.2ms
2: 480x480 (no detections), 37.2ms
3: 480x480 (no detections), 37.2ms
4: 480x480 (no detections), 37.2ms
5: 480x480 (no detections), 37.2ms
6: 480x480 (no detections), 37.2ms
7: 480x480 (no detections), 37.2ms
Speed: 0.0ms preprocess, 37.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  41%|████      | 203/500 [01:26<02:08,  2.32it/s]


0: 480x480 13 persons, 39.9ms
1: 480x480 1 giraffe, 39.9ms
2: 480x480 1 person, 2 cars, 1 bus, 39.9ms
3: 480x480 5 birds, 39.9ms
4: 480x480 (no detections), 39.9ms
5: 480x480 18 sheeps, 39.9ms
6: 480x480 (no detections), 39.9ms
7: 480x480 1 person, 1 boat, 1 surfboard, 39.9ms
Speed: 0.0ms preprocess, 39.9ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  41%|████      | 204/500 [01:26<02:10,  2.27it/s]


0: 480x480 (no detections), 38.4ms
1: 480x480 (no detections), 38.4ms
2: 480x480 (no detections), 38.4ms
3: 480x480 (no detections), 38.4ms
4: 480x480 (no detections), 38.4ms
5: 480x480 (no detections), 38.4ms
6: 480x480 (no detections), 38.4ms
7: 480x480 (no detections), 38.4ms
Speed: 0.0ms preprocess, 38.4ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  41%|████      | 205/500 [01:27<02:09,  2.28it/s]


0: 480x480 (no detections), 38.9ms
1: 480x480 (no detections), 38.9ms
2: 480x480 (no detections), 38.9ms
3: 480x480 (no detections), 38.9ms
4: 480x480 (no detections), 38.9ms
5: 480x480 (no detections), 38.9ms
6: 480x480 (no detections), 38.9ms
7: 480x480 (no detections), 38.9ms
Speed: 0.0ms preprocess, 38.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  41%|████      | 206/500 [01:27<02:08,  2.30it/s]


0: 480x480 (no detections), 38.2ms
1: 480x480 (no detections), 38.2ms
2: 480x480 (no detections), 38.2ms
3: 480x480 (no detections), 38.2ms
4: 480x480 (no detections), 38.2ms
5: 480x480 (no detections), 38.2ms
6: 480x480 (no detections), 38.2ms
7: 480x480 (no detections), 38.2ms
Speed: 0.0ms preprocess, 38.2ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  41%|████▏     | 207/500 [01:27<02:06,  2.32it/s]


0: 480x480 3 persons, 1 car, 1 umbrella, 36.0ms
1: 480x480 1 potted plant, 36.0ms
2: 480x480 2 clocks, 36.0ms
3: 480x480 1 toilet, 36.0ms
4: 480x480 2 persons, 36.0ms
5: 480x480 1 train, 36.0ms
6: 480x480 2 persons, 1 bench, 1 umbrella, 36.0ms
7: 480x480 2 sandwichs, 36.0ms
Speed: 0.0ms preprocess, 36.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  42%|████▏     | 208/500 [01:28<02:07,  2.30it/s]


0: 480x480 (no detections), 42.5ms
1: 480x480 (no detections), 42.5ms
2: 480x480 (no detections), 42.5ms
3: 480x480 (no detections), 42.5ms
4: 480x480 (no detections), 42.5ms
5: 480x480 (no detections), 42.5ms
6: 480x480 (no detections), 42.5ms
7: 480x480 (no detections), 42.5ms
Speed: 0.0ms preprocess, 42.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  42%|████▏     | 209/500 [01:28<02:08,  2.26it/s]


0: 480x480 (no detections), 37.7ms
1: 480x480 (no detections), 37.7ms
2: 480x480 (no detections), 37.7ms
3: 480x480 (no detections), 37.7ms
4: 480x480 (no detections), 37.7ms
5: 480x480 (no detections), 37.7ms
6: 480x480 (no detections), 37.7ms
7: 480x480 (no detections), 37.7ms
Speed: 0.0ms preprocess, 37.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  42%|████▏     | 210/500 [01:29<02:08,  2.26it/s]


0: 480x480 (no detections), 38.9ms
1: 480x480 (no detections), 38.9ms
2: 480x480 (no detections), 38.9ms
3: 480x480 (no detections), 38.9ms
4: 480x480 (no detections), 38.9ms
5: 480x480 (no detections), 38.9ms
6: 480x480 (no detections), 38.9ms
7: 480x480 (no detections), 38.9ms
Speed: 0.0ms preprocess, 38.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  42%|████▏     | 211/500 [01:29<02:06,  2.29it/s]


0: 480x480 (no detections), 40.2ms
1: 480x480 (no detections), 40.2ms
2: 480x480 (no detections), 40.2ms
3: 480x480 (no detections), 40.2ms
4: 480x480 (no detections), 40.2ms
5: 480x480 (no detections), 40.2ms
6: 480x480 (no detections), 40.2ms
7: 480x480 (no detections), 40.2ms
Speed: 0.0ms preprocess, 40.2ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  42%|████▏     | 212/500 [01:30<02:06,  2.28it/s]


0: 480x480 (no detections), 37.7ms
1: 480x480 (no detections), 37.7ms
2: 480x480 (no detections), 37.7ms
3: 480x480 (no detections), 37.7ms
4: 480x480 (no detections), 37.7ms
5: 480x480 (no detections), 37.7ms
6: 480x480 (no detections), 37.7ms
7: 480x480 (no detections), 37.7ms
Speed: 0.0ms preprocess, 37.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  43%|████▎     | 213/500 [01:30<02:05,  2.28it/s]


0: 480x480 (no detections), 44.0ms
1: 480x480 (no detections), 44.0ms
2: 480x480 (no detections), 44.0ms
3: 480x480 (no detections), 44.0ms
4: 480x480 (no detections), 44.0ms
5: 480x480 (no detections), 44.0ms
6: 480x480 (no detections), 44.0ms
7: 480x480 (no detections), 44.0ms
Speed: 0.0ms preprocess, 44.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  43%|████▎     | 214/500 [01:30<02:08,  2.23it/s]


0: 480x480 (no detections), 36.1ms
1: 480x480 (no detections), 36.1ms
2: 480x480 (no detections), 36.1ms
3: 480x480 (no detections), 36.1ms
4: 480x480 (no detections), 36.1ms
5: 480x480 (no detections), 36.1ms
6: 480x480 (no detections), 36.1ms
7: 480x480 (no detections), 36.1ms
Speed: 0.0ms preprocess, 36.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  43%|████▎     | 215/500 [01:31<02:04,  2.29it/s]


0: 480x480 (no detections), 38.3ms
1: 480x480 (no detections), 38.3ms
2: 480x480 (no detections), 38.3ms
3: 480x480 (no detections), 38.3ms
4: 480x480 (no detections), 38.3ms
5: 480x480 (no detections), 38.3ms
6: 480x480 (no detections), 38.3ms
7: 480x480 (no detections), 38.3ms
Speed: 0.0ms preprocess, 38.3ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  43%|████▎     | 216/500 [01:31<02:02,  2.32it/s]


0: 480x480 (no detections), 36.3ms
1: 480x480 (no detections), 36.3ms
2: 480x480 (no detections), 36.3ms
3: 480x480 (no detections), 36.3ms
4: 480x480 (no detections), 36.3ms
5: 480x480 (no detections), 36.3ms
6: 480x480 (no detections), 36.3ms
7: 480x480 (no detections), 36.3ms
Speed: 0.0ms preprocess, 36.3ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  43%|████▎     | 217/500 [01:32<02:00,  2.36it/s]


0: 480x480 3 persons, 1 sports ball, 1 tennis racket, 36.7ms
1: 480x480 1 laptop, 1 keyboard, 36.7ms
2: 480x480 1 bowl, 1 pizza, 36.7ms
3: 480x480 1 person, 36.7ms
4: 480x480 4 teddy bears, 36.7ms
5: 480x480 2 persons, 1 cell phone, 36.7ms
6: 480x480 1 bus, 1 umbrella, 36.7ms
7: 480x480 1 bed, 36.7ms
Speed: 0.0ms preprocess, 36.7ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:  44%|████▎     | 218/500 [01:32<02:00,  2.33it/s]


0: 480x480 (no detections), 45.0ms
1: 480x480 (no detections), 45.0ms
2: 480x480 (no detections), 45.0ms
3: 480x480 (no detections), 45.0ms
4: 480x480 (no detections), 45.0ms
5: 480x480 (no detections), 45.0ms
6: 480x480 (no detections), 45.0ms
7: 480x480 (no detections), 45.0ms
Speed: 0.0ms preprocess, 45.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  44%|████▍     | 219/500 [01:33<02:06,  2.22it/s]


0: 480x480 (no detections), 35.8ms
1: 480x480 (no detections), 35.8ms
2: 480x480 (no detections), 35.8ms
3: 480x480 (no detections), 35.8ms
4: 480x480 (no detections), 35.8ms
5: 480x480 (no detections), 35.8ms
6: 480x480 (no detections), 35.8ms
7: 480x480 (no detections), 35.8ms
Speed: 0.0ms preprocess, 35.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  44%|████▍     | 220/500 [01:33<02:02,  2.28it/s]


0: 480x480 1 person, 1 frisbee, 39.0ms
1: 480x480 4 persons, 39.0ms
2: 480x480 1 bus, 1 clock, 39.0ms
3: 480x480 (no detections), 39.0ms
4: 480x480 1 bed, 39.0ms
5: 480x480 1 cup, 1 fork, 2 bowls, 2 carrots, 3 donuts, 1 dining table, 39.0ms
6: 480x480 1 person, 39.0ms
7: 480x480 1 person, 1 skateboard, 39.0ms
Speed: 0.0ms preprocess, 39.0ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  44%|████▍     | 221/500 [01:34<02:03,  2.27it/s]


0: 480x480 1 person, 1 train, 1 traffic light, 1 backpack, 1 clock, 44.7ms
1: 480x480 19 donuts, 1 oven, 44.7ms
2: 480x480 1 bed, 44.7ms
3: 480x480 2 persons, 1 boat, 1 baseball bat, 1 tennis racket, 44.7ms
4: 480x480 13 persons, 1 car, 10 motorcycles, 3 buss, 3 trucks, 1 traffic light, 44.7ms
5: 480x480 1 cup, 2 tvs, 2 keyboards, 1 cell phone, 44.7ms
6: 480x480 2 persons, 44.7ms
7: 480x480 (no detections), 44.7ms
Speed: 0.0ms preprocess, 44.7ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  44%|████▍     | 222/500 [01:34<02:06,  2.19it/s]


0: 480x480 1 banana, 1 sandwich, 1 dining table, 44.4ms
1: 480x480 (no detections), 44.4ms
2: 480x480 1 cup, 1 banana, 44.4ms
3: 480x480 14 cars, 44.4ms
4: 480x480 (no detections), 44.4ms
5: 480x480 1 train, 44.4ms
6: 480x480 6 cups, 3 bowls, 44.4ms
7: 480x480 1 cup, 2 bowls, 1 sandwich, 44.4ms
Speed: 0.0ms preprocess, 44.4ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  45%|████▍     | 223/500 [01:35<02:10,  2.12it/s]


0: 480x480 (no detections), 39.9ms
1: 480x480 (no detections), 39.9ms
2: 480x480 (no detections), 39.9ms
3: 480x480 (no detections), 39.9ms
4: 480x480 (no detections), 39.9ms
5: 480x480 (no detections), 39.9ms
6: 480x480 (no detections), 39.9ms
7: 480x480 (no detections), 39.9ms
Speed: 0.0ms preprocess, 39.9ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  45%|████▍     | 224/500 [01:35<02:07,  2.16it/s]


0: 480x480 1 person, 1 sports ball, 1 surfboard, 37.2ms
1: 480x480 1 person, 37.2ms
2: 480x480 1 person, 37.2ms
3: 480x480 (no detections), 37.2ms
4: 480x480 1 person, 1 tie, 37.2ms
5: 480x480 1 person, 2 sandwichs, 2 pizzas, 37.2ms
6: 480x480 2 persons, 1 umbrella, 37.2ms
7: 480x480 1 person, 37.2ms
Speed: 0.0ms preprocess, 37.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  45%|████▌     | 225/500 [01:35<02:04,  2.21it/s]


0: 480x480 1 giraffe, 34.7ms
1: 480x480 (no detections), 34.7ms
2: 480x480 1 elephant, 34.7ms
3: 480x480 1 person, 3 bicycles, 34.7ms
4: 480x480 (no detections), 34.7ms
5: 480x480 1 fork, 1 pizza, 1 dining table, 34.7ms
6: 480x480 2 persons, 1 car, 1 bench, 34.7ms
7: 480x480 1 person, 1 bench, 1 laptop, 34.7ms
Speed: 0.0ms preprocess, 34.7ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  45%|████▌     | 226/500 [01:36<02:00,  2.27it/s]


0: 480x480 (no detections), 34.4ms
1: 480x480 (no detections), 34.4ms
2: 480x480 (no detections), 34.4ms
3: 480x480 (no detections), 34.4ms
4: 480x480 (no detections), 34.4ms
5: 480x480 (no detections), 34.4ms
6: 480x480 (no detections), 34.4ms
7: 480x480 (no detections), 34.4ms
Speed: 0.0ms preprocess, 34.4ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  45%|████▌     | 227/500 [01:36<01:55,  2.36it/s]


0: 480x480 1 bench, 3 dogs, 35.8ms
1: 480x480 1 person, 1 bench, 35.8ms
2: 480x480 2 persons, 35.8ms
3: 480x480 1 person, 1 sports ball, 1 tennis racket, 35.8ms
4: 480x480 (no detections), 35.8ms
5: 480x480 2 persons, 1 bicycle, 35.8ms
6: 480x480 (no detections), 35.8ms
7: 480x480 2 laptops, 1 cell phone, 35.8ms
Speed: 0.0ms preprocess, 35.8ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  46%|████▌     | 228/500 [01:37<01:55,  2.36it/s]


0: 480x480 (no detections), 36.0ms
1: 480x480 (no detections), 36.0ms
2: 480x480 (no detections), 36.0ms
3: 480x480 (no detections), 36.0ms
4: 480x480 (no detections), 36.0ms
5: 480x480 (no detections), 36.0ms
6: 480x480 (no detections), 36.0ms
7: 480x480 (no detections), 36.0ms
Speed: 0.0ms preprocess, 36.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  46%|████▌     | 229/500 [01:37<01:52,  2.40it/s]


0: 480x480 1 person, 1 surfboard, 40.5ms
1: 480x480 2 persons, 40.5ms
2: 480x480 1 bus, 40.5ms
3: 480x480 1 toilet, 40.5ms
4: 480x480 2 beds, 40.5ms
5: 480x480 6 persons, 1 kite, 1 skateboard, 40.5ms
6: 480x480 1 frisbee, 40.5ms
7: 480x480 2 elephants, 40.5ms
Speed: 0.0ms preprocess, 40.5ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  46%|████▌     | 230/500 [01:37<01:55,  2.33it/s]


0: 480x480 (no detections), 36.3ms
1: 480x480 (no detections), 36.3ms
2: 480x480 (no detections), 36.3ms
3: 480x480 (no detections), 36.3ms
4: 480x480 (no detections), 36.3ms
5: 480x480 (no detections), 36.3ms
6: 480x480 (no detections), 36.3ms
7: 480x480 (no detections), 36.3ms
Speed: 0.0ms preprocess, 36.3ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  46%|████▌     | 231/500 [01:38<01:54,  2.34it/s]


0: 480x480 (no detections), 36.3ms
1: 480x480 (no detections), 36.3ms
2: 480x480 (no detections), 36.3ms
3: 480x480 (no detections), 36.3ms
4: 480x480 (no detections), 36.3ms
5: 480x480 (no detections), 36.3ms
6: 480x480 (no detections), 36.3ms
7: 480x480 (no detections), 36.3ms
Speed: 0.0ms preprocess, 36.3ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  46%|████▋     | 232/500 [01:38<01:54,  2.33it/s]


0: 480x480 2 persons, 39.4ms
1: 480x480 2 persons, 1 elephant, 39.4ms
2: 480x480 1 person, 1 potted plant, 1 vase, 39.4ms
3: 480x480 3 clocks, 39.4ms
4: 480x480 8 cows, 39.4ms
5: 480x480 1 person, 39.4ms
6: 480x480 1 bottle, 1 oven, 1 sink, 39.4ms
7: 480x480 1 umbrella, 39.4ms
Speed: 0.0ms preprocess, 39.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  47%|████▋     | 233/500 [01:39<01:57,  2.27it/s]


0: 480x480 (no detections), 37.8ms
1: 480x480 (no detections), 37.8ms
2: 480x480 (no detections), 37.8ms
3: 480x480 (no detections), 37.8ms
4: 480x480 (no detections), 37.8ms
5: 480x480 (no detections), 37.8ms
6: 480x480 (no detections), 37.8ms
7: 480x480 (no detections), 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  47%|████▋     | 234/500 [01:39<01:54,  2.31it/s]


0: 480x480 1 person, 1 tennis racket, 35.8ms
1: 480x480 11 sheeps, 35.8ms
2: 480x480 1 bird, 35.8ms
3: 480x480 4 persons, 1 frisbee, 35.8ms
4: 480x480 1 car, 35.8ms
5: 480x480 1 oven, 35.8ms
6: 480x480 (no detections), 35.8ms
7: 480x480 1 motorcycle, 1 bus, 35.8ms
Speed: 0.0ms preprocess, 35.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  47%|████▋     | 235/500 [01:40<01:53,  2.33it/s]


0: 480x480 (no detections), 40.6ms
1: 480x480 (no detections), 40.6ms
2: 480x480 (no detections), 40.6ms
3: 480x480 (no detections), 40.6ms
4: 480x480 (no detections), 40.6ms
5: 480x480 (no detections), 40.6ms
6: 480x480 (no detections), 40.6ms
7: 480x480 (no detections), 40.6ms
Speed: 0.0ms preprocess, 40.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  47%|████▋     | 236/500 [01:40<01:54,  2.30it/s]


0: 480x480 (no detections), 34.5ms
1: 480x480 (no detections), 34.5ms
2: 480x480 (no detections), 34.5ms
3: 480x480 (no detections), 34.5ms
4: 480x480 (no detections), 34.5ms
5: 480x480 (no detections), 34.5ms
6: 480x480 (no detections), 34.5ms
7: 480x480 (no detections), 34.5ms
Speed: 0.0ms preprocess, 34.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  47%|████▋     | 237/500 [01:40<01:51,  2.36it/s]


0: 480x480 (no detections), 39.0ms
1: 480x480 (no detections), 39.0ms
2: 480x480 (no detections), 39.0ms
3: 480x480 (no detections), 39.0ms
4: 480x480 (no detections), 39.0ms
5: 480x480 (no detections), 39.0ms
6: 480x480 (no detections), 39.0ms
7: 480x480 (no detections), 39.0ms
Speed: 0.0ms preprocess, 39.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  48%|████▊     | 238/500 [01:41<01:50,  2.36it/s]


0: 480x480 (no detections), 37.4ms
1: 480x480 (no detections), 37.4ms
2: 480x480 (no detections), 37.4ms
3: 480x480 (no detections), 37.4ms
4: 480x480 (no detections), 37.4ms
5: 480x480 (no detections), 37.4ms
6: 480x480 (no detections), 37.4ms
7: 480x480 (no detections), 37.4ms
Speed: 0.0ms preprocess, 37.4ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  48%|████▊     | 239/500 [01:41<01:49,  2.37it/s]


0: 480x480 1 person, 38.0ms
1: 480x480 5 persons, 3 umbrellas, 6 bottles, 6 cups, 2 forks, 1 knife, 2 bowls, 1 dining table, 38.0ms
2: 480x480 1 person, 38.0ms
3: 480x480 2 trains, 38.0ms
4: 480x480 3 persons, 3 bottles, 1 cup, 38.0ms
5: 480x480 1 train, 38.0ms
6: 480x480 2 persons, 2 horses, 38.0ms
7: 480x480 1 person, 1 cup, 38.0ms
Speed: 0.0ms preprocess, 38.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  48%|████▊     | 240/500 [01:42<01:51,  2.34it/s]


0: 480x480 (no detections), 36.3ms
1: 480x480 (no detections), 36.3ms
2: 480x480 (no detections), 36.3ms
3: 480x480 (no detections), 36.3ms
4: 480x480 (no detections), 36.3ms
5: 480x480 (no detections), 36.3ms
6: 480x480 (no detections), 36.3ms
7: 480x480 (no detections), 36.3ms
Speed: 0.0ms preprocess, 36.3ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  48%|████▊     | 241/500 [01:42<01:50,  2.35it/s]


0: 480x480 2 chairs, 3 couchs, 1 dining table, 1 vase, 41.4ms
1: 480x480 1 cat, 41.4ms
2: 480x480 (no detections), 41.4ms
3: 480x480 3 sheeps, 41.4ms
4: 480x480 1 laptop, 1 cell phone, 1 book, 41.4ms
5: 480x480 2 persons, 2 cars, 2 skateboards, 41.4ms
6: 480x480 2 cars, 2 umbrellas, 41.4ms
7: 480x480 5 persons, 1 dog, 1 handbag, 1 skateboard, 41.4ms
Speed: 0.0ms preprocess, 41.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  48%|████▊     | 242/500 [01:43<01:53,  2.28it/s]


0: 480x480 9 pizzas, 35.3ms
1: 480x480 1 person, 35.3ms
2: 480x480 1 boat, 35.3ms
3: 480x480 1 bench, 2 umbrellas, 35.3ms
4: 480x480 2 clocks, 35.3ms
5: 480x480 7 persons, 2 buss, 2 handbags, 35.3ms
6: 480x480 3 persons, 16 oranges, 35.3ms
7: 480x480 1 person, 6 bottles, 1 oven, 35.3ms
Speed: 0.0ms preprocess, 35.3ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  49%|████▊     | 243/500 [01:43<01:53,  2.26it/s]


0: 480x480 1 person, 1 bottle, 2 ovens, 37.7ms
1: 480x480 1 person, 1 cell phone, 37.7ms
2: 480x480 1 person, 1 horse, 37.7ms
3: 480x480 3 persons, 37.7ms
4: 480x480 3 airplanes, 37.7ms
5: 480x480 2 persons, 1 umbrella, 1 handbag, 37.7ms
6: 480x480 6 persons, 1 car, 5 umbrellas, 37.7ms
7: 480x480 2 persons, 1 bicycle, 1 tie, 37.7ms
Speed: 0.0ms preprocess, 37.7ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  49%|████▉     | 244/500 [01:44<01:52,  2.27it/s]


0: 480x480 (no detections), 37.4ms
1: 480x480 (no detections), 37.4ms
2: 480x480 (no detections), 37.4ms
3: 480x480 (no detections), 37.4ms
4: 480x480 (no detections), 37.4ms
5: 480x480 (no detections), 37.4ms
6: 480x480 (no detections), 37.4ms
7: 480x480 (no detections), 37.4ms
Speed: 0.0ms preprocess, 37.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  49%|████▉     | 245/500 [01:44<01:50,  2.31it/s]


0: 480x480 2 cars, 1 fire hydrant, 36.2ms
1: 480x480 1 cat, 1 cup, 1 book, 36.2ms
2: 480x480 2 persons, 1 handbag, 36.2ms
3: 480x480 1 cup, 2 bowls, 1 carrot, 36.2ms
4: 480x480 1 bird, 36.2ms
5: 480x480 2 laptops, 3 cell phones, 36.2ms
6: 480x480 3 giraffes, 36.2ms
7: 480x480 3 bottles, 36.2ms
Speed: 0.0ms preprocess, 36.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  49%|████▉     | 246/500 [01:44<01:50,  2.31it/s]


0: 480x480 (no detections), 39.3ms
1: 480x480 (no detections), 39.3ms
2: 480x480 (no detections), 39.3ms
3: 480x480 (no detections), 39.3ms
4: 480x480 (no detections), 39.3ms
5: 480x480 (no detections), 39.3ms
6: 480x480 (no detections), 39.3ms
7: 480x480 (no detections), 39.3ms
Speed: 0.0ms preprocess, 39.3ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  49%|████▉     | 247/500 [01:45<01:49,  2.31it/s]


0: 480x480 1 person, 1 train, 35.7ms
1: 480x480 2 persons, 2 bottles, 2 cups, 3 forks, 2 pizzas, 1 dining table, 35.7ms
2: 480x480 1 person, 1 teddy bear, 35.7ms
3: 480x480 1 cat, 35.7ms
4: 480x480 (no detections), 35.7ms
5: 480x480 1 bottle, 2 toilets, 35.7ms
6: 480x480 19 bowls, 1 oven, 35.7ms
7: 480x480 10 birds, 8 sheeps, 1 elephant, 35.7ms
Speed: 0.0ms preprocess, 35.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  50%|████▉     | 248/500 [01:45<01:52,  2.24it/s]


0: 480x480 (no detections), 37.6ms
1: 480x480 (no detections), 37.6ms
2: 480x480 (no detections), 37.6ms
3: 480x480 (no detections), 37.6ms
4: 480x480 (no detections), 37.6ms
5: 480x480 (no detections), 37.6ms
6: 480x480 (no detections), 37.6ms
7: 480x480 (no detections), 37.6ms
Speed: 0.0ms preprocess, 37.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  50%|████▉     | 249/500 [01:46<01:51,  2.26it/s]


0: 480x480 (no detections), 35.6ms
1: 480x480 (no detections), 35.6ms
2: 480x480 (no detections), 35.6ms
3: 480x480 (no detections), 35.6ms
4: 480x480 (no detections), 35.6ms
5: 480x480 (no detections), 35.6ms
6: 480x480 (no detections), 35.6ms
7: 480x480 (no detections), 35.6ms
Speed: 0.0ms preprocess, 35.6ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  50%|█████     | 250/500 [01:46<01:47,  2.32it/s]


0: 480x480 1 giraffe, 34.6ms
1: 480x480 1 person, 34.6ms
2: 480x480 1 zebra, 34.6ms
3: 480x480 9 persons, 2 boats, 5 umbrellas, 34.6ms
4: 480x480 1 person, 1 kite, 34.6ms
5: 480x480 1 toilet, 34.6ms
6: 480x480 1 bowl, 4 oranges, 34.6ms
7: 480x480 3 persons, 1 pizza, 34.6ms
Speed: 0.0ms preprocess, 34.6ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  50%|█████     | 251/500 [01:47<01:46,  2.34it/s]


0: 480x480 1 car, 3 boats, 38.9ms
1: 480x480 14 persons, 2 handbags, 38.9ms
2: 480x480 1 bus, 38.9ms
3: 480x480 1 bottle, 1 cup, 3 knifes, 38.9ms
4: 480x480 2 beds, 38.9ms
5: 480x480 2 zebras, 38.9ms
6: 480x480 1 cat, 1 bed, 38.9ms
7: 480x480 1 cat, 1 bed, 38.9ms
Speed: 0.0ms preprocess, 38.9ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  50%|█████     | 252/500 [01:47<01:48,  2.29it/s]


0: 480x480 2 persons, 35.7ms
1: 480x480 8 persons, 1 bench, 35.7ms
2: 480x480 3 bottles, 35.7ms
3: 480x480 1 person, 2 pizzas, 35.7ms
4: 480x480 1 bottle, 2 ovens, 1 refrigerator, 35.7ms
5: 480x480 2 persons, 35.7ms
6: 480x480 1 umbrella, 35.7ms
7: 480x480 2 persons, 35.7ms
Speed: 0.0ms preprocess, 35.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  51%|█████     | 253/500 [01:47<01:48,  2.29it/s]


0: 480x480 (no detections), 36.7ms
1: 480x480 (no detections), 36.7ms
2: 480x480 (no detections), 36.7ms
3: 480x480 (no detections), 36.7ms
4: 480x480 (no detections), 36.7ms
5: 480x480 (no detections), 36.7ms
6: 480x480 (no detections), 36.7ms
7: 480x480 (no detections), 36.7ms
Speed: 0.0ms preprocess, 36.7ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:  51%|█████     | 254/500 [01:48<01:45,  2.34it/s]


0: 480x480 4 cars, 1 fire hydrant, 36.1ms
1: 480x480 14 persons, 1 handbag, 2 kites, 36.1ms
2: 480x480 1 motorcycle, 36.1ms
3: 480x480 3 airplanes, 4 trucks, 36.1ms
4: 480x480 5 bottles, 1 sink, 1 toothbrush, 36.1ms
5: 480x480 1 fire hydrant, 36.1ms
6: 480x480 2 persons, 1 remote, 36.1ms
7: 480x480 2 persons, 1 umbrella, 1 banana, 36.1ms
Speed: 0.0ms preprocess, 36.1ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  51%|█████     | 255/500 [01:48<01:44,  2.35it/s]


0: 480x480 (no detections), 37.0ms
1: 480x480 (no detections), 37.0ms
2: 480x480 (no detections), 37.0ms
3: 480x480 (no detections), 37.0ms
4: 480x480 (no detections), 37.0ms
5: 480x480 (no detections), 37.0ms
6: 480x480 (no detections), 37.0ms
7: 480x480 (no detections), 37.0ms
Speed: 0.0ms preprocess, 37.0ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  51%|█████     | 256/500 [01:49<01:42,  2.37it/s]


0: 480x480 (no detections), 39.4ms
1: 480x480 (no detections), 39.4ms
2: 480x480 (no detections), 39.4ms
3: 480x480 (no detections), 39.4ms
4: 480x480 (no detections), 39.4ms
5: 480x480 (no detections), 39.4ms
6: 480x480 (no detections), 39.4ms
7: 480x480 (no detections), 39.4ms
Speed: 0.0ms preprocess, 39.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  51%|█████▏    | 257/500 [01:49<01:42,  2.37it/s]


0: 480x480 (no detections), 37.9ms
1: 480x480 (no detections), 37.9ms
2: 480x480 (no detections), 37.9ms
3: 480x480 (no detections), 37.9ms
4: 480x480 (no detections), 37.9ms
5: 480x480 (no detections), 37.9ms
6: 480x480 (no detections), 37.9ms
7: 480x480 (no detections), 37.9ms
Speed: 0.0ms preprocess, 37.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  52%|█████▏    | 258/500 [01:50<01:42,  2.36it/s]


0: 480x480 (no detections), 36.8ms
1: 480x480 1 person, 1 sandwich, 1 donut, 36.8ms
2: 480x480 2 persons, 1 tie, 36.8ms
3: 480x480 1 person, 1 surfboard, 36.8ms
4: 480x480 2 clocks, 36.8ms
5: 480x480 (no detections), 36.8ms
6: 480x480 1 dog, 1 bottle, 36.8ms
7: 480x480 7 birds, 36.8ms
Speed: 0.0ms preprocess, 36.8ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  52%|█████▏    | 259/500 [01:50<01:42,  2.35it/s]


0: 480x480 (no detections), 35.4ms
1: 480x480 (no detections), 35.4ms
2: 480x480 (no detections), 35.4ms
3: 480x480 (no detections), 35.4ms
4: 480x480 (no detections), 35.4ms
5: 480x480 (no detections), 35.4ms
6: 480x480 (no detections), 35.4ms
7: 480x480 (no detections), 35.4ms
Speed: 0.0ms preprocess, 35.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  52%|█████▏    | 260/500 [01:50<01:39,  2.41it/s]


0: 480x480 (no detections), 37.4ms
1: 480x480 (no detections), 37.4ms
2: 480x480 (no detections), 37.4ms
3: 480x480 (no detections), 37.4ms
4: 480x480 (no detections), 37.4ms
5: 480x480 (no detections), 37.4ms
6: 480x480 (no detections), 37.4ms
7: 480x480 (no detections), 37.4ms
Speed: 0.0ms preprocess, 37.4ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  52%|█████▏    | 261/500 [01:51<01:39,  2.41it/s]


0: 480x480 (no detections), 33.9ms
1: 480x480 (no detections), 33.9ms
2: 480x480 (no detections), 33.9ms
3: 480x480 (no detections), 33.9ms
4: 480x480 (no detections), 33.9ms
5: 480x480 (no detections), 33.9ms
6: 480x480 (no detections), 33.9ms
7: 480x480 (no detections), 33.9ms
Speed: 0.0ms preprocess, 33.9ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  52%|█████▏    | 262/500 [01:51<01:36,  2.45it/s]


0: 480x480 (no detections), 37.8ms
1: 480x480 (no detections), 37.8ms
2: 480x480 (no detections), 37.8ms
3: 480x480 (no detections), 37.8ms
4: 480x480 (no detections), 37.8ms
5: 480x480 (no detections), 37.8ms
6: 480x480 (no detections), 37.8ms
7: 480x480 (no detections), 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  53%|█████▎    | 263/500 [01:52<01:38,  2.40it/s]


0: 480x480 (no detections), 37.2ms
1: 480x480 (no detections), 37.2ms
2: 480x480 (no detections), 37.2ms
3: 480x480 (no detections), 37.2ms
4: 480x480 (no detections), 37.2ms
5: 480x480 (no detections), 37.2ms
6: 480x480 (no detections), 37.2ms
7: 480x480 (no detections), 37.2ms
Speed: 0.0ms preprocess, 37.2ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  53%|█████▎    | 264/500 [01:52<01:38,  2.39it/s]


0: 480x480 (no detections), 39.0ms
1: 480x480 1 wine glass, 1 fork, 2 bowls, 1 sandwich, 1 dining table, 39.0ms
2: 480x480 7 persons, 1 kite, 39.0ms
3: 480x480 2 wine glasss, 39.0ms
4: 480x480 1 bottle, 1 fork, 1 knife, 1 spoon, 3 bowls, 2 sandwichs, 1 dining table, 39.0ms
5: 480x480 (no detections), 39.0ms
6: 480x480 1 cell phone, 39.0ms
7: 480x480 1 person, 1 skateboard, 39.0ms
Speed: 0.0ms preprocess, 39.0ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 480)


Validating:  53%|█████▎    | 265/500 [01:52<01:40,  2.33it/s]


0: 480x480 (no detections), 38.8ms
1: 480x480 (no detections), 38.8ms
2: 480x480 (no detections), 38.8ms
3: 480x480 (no detections), 38.8ms
4: 480x480 (no detections), 38.8ms
5: 480x480 (no detections), 38.8ms
6: 480x480 (no detections), 38.8ms
7: 480x480 (no detections), 38.8ms
Speed: 0.0ms preprocess, 38.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  53%|█████▎    | 266/500 [01:53<01:40,  2.32it/s]


0: 480x480 4 persons, 2 buss, 1 truck, 1 handbag, 1 suitcase, 38.5ms
1: 480x480 (no detections), 38.5ms
2: 480x480 1 toilet, 1 sink, 38.5ms
3: 480x480 1 bear, 1 sports ball, 38.5ms
4: 480x480 1 boat, 1 clock, 38.5ms
5: 480x480 2 persons, 2 horses, 38.5ms
6: 480x480 3 persons, 1 car, 3 kites, 1 surfboard, 38.5ms
7: 480x480 7 persons, 1 motorcycle, 38.5ms
Speed: 0.0ms preprocess, 38.5ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  53%|█████▎    | 267/500 [01:53<01:41,  2.30it/s]


0: 480x480 1 oven, 37.8ms
1: 480x480 1 person, 1 bench, 37.8ms
2: 480x480 1 stop sign, 37.8ms
3: 480x480 1 car, 1 train, 37.8ms
4: 480x480 1 bird, 37.8ms
5: 480x480 2 persons, 1 skis, 37.8ms
6: 480x480 (no detections), 37.8ms
7: 480x480 9 persons, 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  54%|█████▎    | 268/500 [01:54<01:42,  2.27it/s]


0: 480x480 (no detections), 36.2ms
1: 480x480 1 dog, 5 books, 36.2ms
2: 480x480 2 persons, 1 truck, 1 umbrella, 3 cups, 36.2ms
3: 480x480 4 cars, 1 bench, 36.2ms
4: 480x480 1 bowl, 3 cakes, 36.2ms
5: 480x480 1 person, 1 tie, 36.2ms
6: 480x480 3 persons, 1 baseball bat, 36.2ms
7: 480x480 2 toilets, 36.2ms
Speed: 0.0ms preprocess, 36.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  54%|█████▍    | 269/500 [01:54<01:41,  2.28it/s]


0: 480x480 (no detections), 42.6ms
1: 480x480 (no detections), 42.6ms
2: 480x480 (no detections), 42.6ms
3: 480x480 (no detections), 42.6ms
4: 480x480 (no detections), 42.6ms
5: 480x480 (no detections), 42.6ms
6: 480x480 (no detections), 42.6ms
7: 480x480 (no detections), 42.6ms
Speed: 0.0ms preprocess, 42.6ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  54%|█████▍    | 270/500 [01:55<01:41,  2.26it/s]


0: 480x480 1 bus, 36.4ms
1: 480x480 1 person, 1 sports ball, 2 baseball bats, 36.4ms
2: 480x480 1 pizza, 36.4ms
3: 480x480 1 laptop, 1 keyboard, 36.4ms
4: 480x480 3 persons, 1 umbrella, 1 surfboard, 36.4ms
5: 480x480 2 cats, 36.4ms
6: 480x480 2 cups, 1 banana, 36.4ms
7: 480x480 1 person, 2 umbrellas, 36.4ms
Speed: 0.0ms preprocess, 36.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  54%|█████▍    | 271/500 [01:55<01:41,  2.26it/s]


0: 480x480 2 persons, 1 surfboard, 40.2ms
1: 480x480 (no detections), 40.2ms
2: 480x480 1 person, 40.2ms
3: 480x480 1 bear, 1 umbrella, 40.2ms
4: 480x480 1 person, 2 skiss, 40.2ms
5: 480x480 1 microwave, 40.2ms
6: 480x480 1 person, 1 car, 1 bus, 40.2ms
7: 480x480 1 person, 1 tie, 2 bottles, 1 wine glass, 1 book, 40.2ms
Speed: 0.0ms preprocess, 40.2ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  54%|█████▍    | 272/500 [01:56<01:42,  2.23it/s]


0: 480x480 2 persons, 34.2ms
1: 480x480 1 fork, 1 banana, 1 carrot, 34.2ms
2: 480x480 12 birds, 9 sheeps, 6 cows, 34.2ms
3: 480x480 (no detections), 34.2ms
4: 480x480 10 persons, 4 motorcycles, 1 bench, 34.2ms
5: 480x480 1 elephant, 34.2ms
6: 480x480 1 bench, 34.2ms
7: 480x480 1 person, 1 skis, 34.2ms
Speed: 0.0ms preprocess, 34.2ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 480)


Validating:  55%|█████▍    | 273/500 [01:56<01:39,  2.28it/s]


0: 480x480 (no detections), 37.8ms
1: 480x480 (no detections), 37.8ms
2: 480x480 (no detections), 37.8ms
3: 480x480 (no detections), 37.8ms
4: 480x480 (no detections), 37.8ms
5: 480x480 (no detections), 37.8ms
6: 480x480 (no detections), 37.8ms
7: 480x480 (no detections), 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  55%|█████▍    | 274/500 [01:56<01:37,  2.31it/s]


0: 480x480 (no detections), 33.7ms
1: 480x480 (no detections), 33.7ms
2: 480x480 (no detections), 33.7ms
3: 480x480 (no detections), 33.7ms
4: 480x480 (no detections), 33.7ms
5: 480x480 (no detections), 33.7ms
6: 480x480 (no detections), 33.7ms
7: 480x480 (no detections), 33.7ms
Speed: 0.0ms preprocess, 33.7ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  55%|█████▌    | 275/500 [01:57<01:34,  2.39it/s]


0: 480x480 (no detections), 36.7ms
1: 480x480 (no detections), 36.7ms
2: 480x480 (no detections), 36.7ms
3: 480x480 (no detections), 36.7ms
4: 480x480 (no detections), 36.7ms
5: 480x480 (no detections), 36.7ms
6: 480x480 (no detections), 36.7ms
7: 480x480 (no detections), 36.7ms
Speed: 0.0ms preprocess, 36.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  55%|█████▌    | 276/500 [01:57<01:33,  2.40it/s]


0: 480x480 1 knife, 41.4ms
1: 480x480 1 person, 1 surfboard, 41.4ms
2: 480x480 (no detections), 41.4ms
3: 480x480 6 persons, 1 horse, 4 elephants, 41.4ms
4: 480x480 2 laptops, 41.4ms
5: 480x480 1 person, 41.4ms
6: 480x480 2 persons, 2 teddy bears, 41.4ms
7: 480x480 1 motorcycle, 41.4ms
Speed: 0.0ms preprocess, 41.4ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 480)


Validating:  55%|█████▌    | 277/500 [01:58<01:36,  2.30it/s]


0: 480x480 (no detections), 38.4ms
1: 480x480 (no detections), 38.4ms
2: 480x480 (no detections), 38.4ms
3: 480x480 (no detections), 38.4ms
4: 480x480 (no detections), 38.4ms
5: 480x480 (no detections), 38.4ms
6: 480x480 (no detections), 38.4ms
7: 480x480 (no detections), 38.4ms
Speed: 0.0ms preprocess, 38.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  56%|█████▌    | 278/500 [01:58<01:37,  2.28it/s]


0: 480x480 1 teddy bear, 37.7ms
1: 480x480 1 chair, 3 couchs, 37.7ms
2: 480x480 1 bed, 1 teddy bear, 37.7ms
3: 480x480 2 forks, 1 knife, 3 bowls, 1 banana, 2 sandwichs, 37.7ms
4: 480x480 1 clock, 37.7ms
5: 480x480 1 cup, 1 bowl, 1 pizza, 1 chair, 1 dining table, 37.7ms
6: 480x480 1 toilet, 1 laptop, 37.7ms
7: 480x480 1 bird, 37.7ms
Speed: 0.0ms preprocess, 37.7ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 480)


Validating:  56%|█████▌    | 279/500 [01:59<01:37,  2.27it/s]


0: 480x480 1 airplane, 35.3ms
1: 480x480 1 person, 1 train, 35.3ms
2: 480x480 10 persons, 3 frisbees, 35.3ms
3: 480x480 1 train, 35.3ms
4: 480x480 1 person, 1 tennis racket, 35.3ms
5: 480x480 1 person, 1 car, 2 skateboards, 35.3ms
6: 480x480 4 persons, 1 bottle, 1 couch, 2 remotes, 35.3ms
7: 480x480 1 person, 35.3ms
Speed: 0.0ms preprocess, 35.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  56%|█████▌    | 280/500 [01:59<01:35,  2.30it/s]


0: 480x480 (no detections), 34.8ms
1: 480x480 (no detections), 34.8ms
2: 480x480 (no detections), 34.8ms
3: 480x480 (no detections), 34.8ms
4: 480x480 (no detections), 34.8ms
5: 480x480 (no detections), 34.8ms
6: 480x480 (no detections), 34.8ms
7: 480x480 (no detections), 34.8ms
Speed: 0.0ms preprocess, 34.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  56%|█████▌    | 281/500 [01:59<01:32,  2.37it/s]


0: 480x480 (no detections), 39.6ms
1: 480x480 (no detections), 39.6ms
2: 480x480 (no detections), 39.6ms
3: 480x480 (no detections), 39.6ms
4: 480x480 (no detections), 39.6ms
5: 480x480 (no detections), 39.6ms
6: 480x480 (no detections), 39.6ms
7: 480x480 (no detections), 39.6ms
Speed: 0.0ms preprocess, 39.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  56%|█████▋    | 282/500 [02:00<01:32,  2.36it/s]


0: 480x480 (no detections), 37.0ms
1: 480x480 (no detections), 37.0ms
2: 480x480 (no detections), 37.0ms
3: 480x480 (no detections), 37.0ms
4: 480x480 (no detections), 37.0ms
5: 480x480 (no detections), 37.0ms
6: 480x480 (no detections), 37.0ms
7: 480x480 (no detections), 37.0ms
Speed: 0.0ms preprocess, 37.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  57%|█████▋    | 283/500 [02:00<01:32,  2.35it/s]


0: 480x480 5 cars, 1 bench, 1 laptop, 40.1ms
1: 480x480 1 bed, 2 teddy bears, 40.1ms
2: 480x480 10 donuts, 1 oven, 40.1ms
3: 480x480 4 persons, 1 frisbee, 40.1ms
4: 480x480 3 boats, 40.1ms
5: 480x480 1 person, 40.1ms
6: 480x480 3 persons, 1 airplane, 40.1ms
7: 480x480 1 person, 1 sports ball, 2 tennis rackets, 40.1ms
Speed: 0.0ms preprocess, 40.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  57%|█████▋    | 284/500 [02:01<01:34,  2.28it/s]


0: 480x480 1 bench, 36.3ms
1: 480x480 1 toilet, 36.3ms
2: 480x480 3 beds, 36.3ms
3: 480x480 1 person, 1 dog, 36.3ms
4: 480x480 1 bird, 36.3ms
5: 480x480 4 persons, 2 beds, 36.3ms
6: 480x480 1 person, 1 bottle, 4 cups, 1 pizza, 2 dining tables, 36.3ms
7: 480x480 7 persons, 36.3ms
Speed: 0.0ms preprocess, 36.3ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:  57%|█████▋    | 285/500 [02:01<01:33,  2.29it/s]


0: 480x480 (no detections), 38.3ms
1: 480x480 (no detections), 38.3ms
2: 480x480 (no detections), 38.3ms
3: 480x480 (no detections), 38.3ms
4: 480x480 (no detections), 38.3ms
5: 480x480 (no detections), 38.3ms
6: 480x480 (no detections), 38.3ms
7: 480x480 (no detections), 38.3ms
Speed: 0.0ms preprocess, 38.3ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  57%|█████▋    | 286/500 [02:02<01:32,  2.31it/s]


0: 480x480 (no detections), 36.4ms
1: 480x480 (no detections), 36.4ms
2: 480x480 (no detections), 36.4ms
3: 480x480 (no detections), 36.4ms
4: 480x480 (no detections), 36.4ms
5: 480x480 (no detections), 36.4ms
6: 480x480 (no detections), 36.4ms
7: 480x480 (no detections), 36.4ms
Speed: 0.0ms preprocess, 36.4ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  57%|█████▋    | 287/500 [02:02<01:31,  2.33it/s]


0: 480x480 (no detections), 37.6ms
1: 480x480 (no detections), 37.6ms
2: 480x480 (no detections), 37.6ms
3: 480x480 (no detections), 37.6ms
4: 480x480 (no detections), 37.6ms
5: 480x480 (no detections), 37.6ms
6: 480x480 (no detections), 37.6ms
7: 480x480 (no detections), 37.6ms
Speed: 0.0ms preprocess, 37.6ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  58%|█████▊    | 288/500 [02:02<01:30,  2.33it/s]


0: 480x480 (no detections), 38.2ms
1: 480x480 (no detections), 38.2ms
2: 480x480 (no detections), 38.2ms
3: 480x480 (no detections), 38.2ms
4: 480x480 (no detections), 38.2ms
5: 480x480 (no detections), 38.2ms
6: 480x480 (no detections), 38.2ms
7: 480x480 (no detections), 38.2ms
Speed: 0.0ms preprocess, 38.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  58%|█████▊    | 289/500 [02:03<01:30,  2.34it/s]


0: 480x480 1 horse, 1 cow, 34.6ms
1: 480x480 10 boats, 1 bench, 34.6ms
2: 480x480 1 mouse, 1 keyboard, 34.6ms
3: 480x480 1 umbrella, 34.6ms
4: 480x480 2 bowls, 34.6ms
5: 480x480 5 persons, 3 bottles, 3 wine glasss, 1 cup, 1 fork, 1 spoon, 1 bowl, 6 chairs, 2 dining tables, 34.6ms
6: 480x480 2 persons, 1 bicycle, 22 cars, 1 motorcycle, 1 truck, 34.6ms
7: 480x480 1 airplane, 34.6ms
Speed: 0.0ms preprocess, 34.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  58%|█████▊    | 290/500 [02:03<01:29,  2.36it/s]


0: 480x480 3 persons, 2 bottles, 2 wine glasss, 1 fork, 1 knife, 2 pizzas, 2 dining tables, 35.2ms
1: 480x480 1 bed, 35.2ms
2: 480x480 1 person, 35.2ms
3: 480x480 1 pizza, 35.2ms
4: 480x480 2 birds, 35.2ms
5: 480x480 9 fire hydrants, 35.2ms
6: 480x480 6 persons, 2 bicycles, 35.2ms
7: 480x480 3 persons, 1 bed, 35.2ms
Speed: 0.0ms preprocess, 35.2ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  58%|█████▊    | 291/500 [02:04<01:28,  2.35it/s]


0: 480x480 (no detections), 38.9ms
1: 480x480 (no detections), 38.9ms
2: 480x480 (no detections), 38.9ms
3: 480x480 (no detections), 38.9ms
4: 480x480 (no detections), 38.9ms
5: 480x480 (no detections), 38.9ms
6: 480x480 (no detections), 38.9ms
7: 480x480 (no detections), 38.9ms
Speed: 0.0ms preprocess, 38.9ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  58%|█████▊    | 292/500 [02:04<01:29,  2.32it/s]


0: 480x480 (no detections), 36.6ms
1: 480x480 (no detections), 36.6ms
2: 480x480 (no detections), 36.6ms
3: 480x480 (no detections), 36.6ms
4: 480x480 (no detections), 36.6ms
5: 480x480 (no detections), 36.6ms
6: 480x480 (no detections), 36.6ms
7: 480x480 (no detections), 36.6ms
Speed: 0.0ms preprocess, 36.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  59%|█████▊    | 293/500 [02:05<01:28,  2.33it/s]


0: 480x480 (no detections), 36.8ms
1: 480x480 (no detections), 36.8ms
2: 480x480 (no detections), 36.8ms
3: 480x480 (no detections), 36.8ms
4: 480x480 (no detections), 36.8ms
5: 480x480 (no detections), 36.8ms
6: 480x480 (no detections), 36.8ms
7: 480x480 (no detections), 36.8ms
Speed: 0.0ms preprocess, 36.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  59%|█████▉    | 294/500 [02:05<01:27,  2.36it/s]


0: 480x480 1 person, 1 bench, 1 book, 35.5ms
1: 480x480 1 boat, 2 clocks, 35.5ms
2: 480x480 7 persons, 1 tennis racket, 1 chair, 35.5ms
3: 480x480 1 toilet, 35.5ms
4: 480x480 2 persons, 1 bench, 1 tie, 35.5ms
5: 480x480 7 persons, 3 skiss, 35.5ms
6: 480x480 1 person, 2 frisbees, 35.5ms
7: 480x480 1 cat, 1 laptop, 1 mouse, 35.5ms
Speed: 0.0ms preprocess, 35.5ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  59%|█████▉    | 295/500 [02:05<01:26,  2.37it/s]


0: 480x480 (no detections), 42.2ms
1: 480x480 (no detections), 42.2ms
2: 480x480 (no detections), 42.2ms
3: 480x480 (no detections), 42.2ms
4: 480x480 (no detections), 42.2ms
5: 480x480 (no detections), 42.2ms
6: 480x480 (no detections), 42.2ms
7: 480x480 (no detections), 42.2ms
Speed: 0.0ms preprocess, 42.2ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  59%|█████▉    | 296/500 [02:06<01:28,  2.31it/s]


0: 480x480 (no detections), 37.3ms
1: 480x480 (no detections), 37.3ms
2: 480x480 (no detections), 37.3ms
3: 480x480 (no detections), 37.3ms
4: 480x480 (no detections), 37.3ms
5: 480x480 (no detections), 37.3ms
6: 480x480 (no detections), 37.3ms
7: 480x480 (no detections), 37.3ms
Speed: 0.0ms preprocess, 37.3ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  59%|█████▉    | 297/500 [02:06<01:27,  2.32it/s]


0: 480x480 (no detections), 42.8ms
1: 480x480 (no detections), 42.8ms
2: 480x480 (no detections), 42.8ms
3: 480x480 (no detections), 42.8ms
4: 480x480 (no detections), 42.8ms
5: 480x480 (no detections), 42.8ms
6: 480x480 (no detections), 42.8ms
7: 480x480 (no detections), 42.8ms
Speed: 0.0ms preprocess, 42.8ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  60%|█████▉    | 298/500 [02:07<01:28,  2.27it/s]


0: 480x480 (no detections), 35.7ms
1: 480x480 (no detections), 35.7ms
2: 480x480 (no detections), 35.7ms
3: 480x480 (no detections), 35.7ms
4: 480x480 (no detections), 35.7ms
5: 480x480 (no detections), 35.7ms
6: 480x480 (no detections), 35.7ms
7: 480x480 (no detections), 35.7ms
Speed: 0.0ms preprocess, 35.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  60%|█████▉    | 299/500 [02:07<01:25,  2.34it/s]


0: 480x480 (no detections), 38.7ms
1: 480x480 (no detections), 38.7ms
2: 480x480 (no detections), 38.7ms
3: 480x480 (no detections), 38.7ms
4: 480x480 (no detections), 38.7ms
5: 480x480 (no detections), 38.7ms
6: 480x480 (no detections), 38.7ms
7: 480x480 (no detections), 38.7ms
Speed: 0.0ms preprocess, 38.7ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  60%|██████    | 300/500 [02:08<01:26,  2.32it/s]


0: 480x480 (no detections), 40.2ms
1: 480x480 (no detections), 40.2ms
2: 480x480 (no detections), 40.2ms
3: 480x480 (no detections), 40.2ms
4: 480x480 (no detections), 40.2ms
5: 480x480 (no detections), 40.2ms
6: 480x480 (no detections), 40.2ms
7: 480x480 (no detections), 40.2ms
Speed: 0.0ms preprocess, 40.2ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  60%|██████    | 301/500 [02:08<01:26,  2.31it/s]


0: 480x480 (no detections), 40.9ms
1: 480x480 3 cell phones, 40.9ms
2: 480x480 (no detections), 40.9ms
3: 480x480 2 persons, 2 airplanes, 2 trucks, 40.9ms
4: 480x480 1 bowl, 7 donuts, 1 dining table, 40.9ms
5: 480x480 3 persons, 40.9ms
6: 480x480 1 giraffe, 1 umbrella, 40.9ms
7: 480x480 2 persons, 3 bottles, 1 cup, 2 bowls, 2 bananas, 2 apples, 1 orange, 2 teddy bears, 40.9ms
Speed: 0.0ms preprocess, 40.9ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  60%|██████    | 302/500 [02:08<01:27,  2.27it/s]


0: 480x480 (no detections), 39.8ms
1: 480x480 (no detections), 39.8ms
2: 480x480 (no detections), 39.8ms
3: 480x480 (no detections), 39.8ms
4: 480x480 (no detections), 39.8ms
5: 480x480 (no detections), 39.8ms
6: 480x480 (no detections), 39.8ms
7: 480x480 (no detections), 39.8ms
Speed: 0.0ms preprocess, 39.8ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  61%|██████    | 303/500 [02:09<01:26,  2.28it/s]


0: 480x480 (no detections), 36.0ms
1: 480x480 (no detections), 36.0ms
2: 480x480 (no detections), 36.0ms
3: 480x480 (no detections), 36.0ms
4: 480x480 (no detections), 36.0ms
5: 480x480 (no detections), 36.0ms
6: 480x480 (no detections), 36.0ms
7: 480x480 (no detections), 36.0ms
Speed: 0.0ms preprocess, 36.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  61%|██████    | 304/500 [02:09<01:23,  2.34it/s]


0: 480x480 (no detections), 39.0ms
1: 480x480 (no detections), 39.0ms
2: 480x480 (no detections), 39.0ms
3: 480x480 (no detections), 39.0ms
4: 480x480 (no detections), 39.0ms
5: 480x480 (no detections), 39.0ms
6: 480x480 (no detections), 39.0ms
7: 480x480 (no detections), 39.0ms
Speed: 0.0ms preprocess, 39.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  61%|██████    | 305/500 [02:10<01:22,  2.35it/s]


0: 480x480 7 persons, 1 tie, 36.6ms
1: 480x480 1 person, 2 surfboards, 36.6ms
2: 480x480 1 fork, 1 bowl, 36.6ms
3: 480x480 1 bird, 36.6ms
4: 480x480 1 person, 1 umbrella, 36.6ms
5: 480x480 1 tv, 1 mouse, 1 keyboard, 36.6ms
6: 480x480 8 persons, 1 bus, 36.6ms
7: 480x480 4 boats, 36.6ms
Speed: 0.0ms preprocess, 36.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  61%|██████    | 306/500 [02:10<01:22,  2.36it/s]


0: 480x480 (no detections), 37.8ms
1: 480x480 (no detections), 37.8ms
2: 480x480 (no detections), 37.8ms
3: 480x480 (no detections), 37.8ms
4: 480x480 (no detections), 37.8ms
5: 480x480 (no detections), 37.8ms
6: 480x480 (no detections), 37.8ms
7: 480x480 (no detections), 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  61%|██████▏   | 307/500 [02:11<01:21,  2.37it/s]


0: 480x480 (no detections), 37.1ms
1: 480x480 (no detections), 37.1ms
2: 480x480 (no detections), 37.1ms
3: 480x480 (no detections), 37.1ms
4: 480x480 (no detections), 37.1ms
5: 480x480 (no detections), 37.1ms
6: 480x480 (no detections), 37.1ms
7: 480x480 (no detections), 37.1ms
Speed: 0.0ms preprocess, 37.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  62%|██████▏   | 308/500 [02:11<01:21,  2.37it/s]


0: 480x480 3 beds, 37.0ms
1: 480x480 1 pizza, 37.0ms
2: 480x480 1 person, 1 bottle, 1 sandwich, 37.0ms
3: 480x480 8 persons, 2 bicycles, 2 cars, 1 bus, 3 trucks, 37.0ms
4: 480x480 1 bird, 37.0ms
5: 480x480 4 cows, 37.0ms
6: 480x480 1 person, 1 kite, 37.0ms
7: 480x480 1 tennis racket, 37.0ms
Speed: 0.0ms preprocess, 37.0ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  62%|██████▏   | 309/500 [02:11<01:21,  2.34it/s]


0: 480x480 3 persons, 2 bicycles, 3 cars, 1 truck, 37.8ms
1: 480x480 2 persons, 1 backpack, 37.8ms
2: 480x480 1 person, 1 umbrella, 6 bottles, 1 bowl, 1 vase, 37.8ms
3: 480x480 1 donut, 37.8ms
4: 480x480 1 person, 1 skateboard, 37.8ms
5: 480x480 (no detections), 37.8ms
6: 480x480 1 person, 1 laptop, 37.8ms
7: 480x480 2 persons, 1 umbrella, 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  62%|██████▏   | 310/500 [02:12<01:21,  2.32it/s]


0: 480x480 (no detections), 35.5ms
1: 480x480 (no detections), 35.5ms
2: 480x480 (no detections), 35.5ms
3: 480x480 (no detections), 35.5ms
4: 480x480 (no detections), 35.5ms
5: 480x480 (no detections), 35.5ms
6: 480x480 (no detections), 35.5ms
7: 480x480 (no detections), 35.5ms
Speed: 0.0ms preprocess, 35.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  62%|██████▏   | 311/500 [02:12<01:20,  2.36it/s]


0: 480x480 4 persons, 3 horses, 35.3ms
1: 480x480 1 cat, 35.3ms
2: 480x480 1 tv, 1 laptop, 2 mouses, 2 keyboards, 1 cell phone, 35.3ms
3: 480x480 2 persons, 1 umbrella, 35.3ms
4: 480x480 10 persons, 2 handbags, 35.3ms
5: 480x480 6 cars, 1 boat, 1 clock, 35.3ms
6: 480x480 5 persons, 2 tennis rackets, 35.3ms
7: 480x480 2 persons, 1 cup, 1 pizza, 35.3ms
Speed: 0.0ms preprocess, 35.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  62%|██████▏   | 312/500 [02:13<01:19,  2.38it/s]


0: 480x480 1 elephant, 38.4ms
1: 480x480 (no detections), 38.4ms
2: 480x480 6 persons, 1 car, 6 trucks, 1 bench, 4 backpacks, 1 suitcase, 38.4ms
3: 480x480 1 bird, 38.4ms
4: 480x480 1 person, 1 bus, 38.4ms
5: 480x480 1 cat, 38.4ms
6: 480x480 9 persons, 3 benchs, 22 kites, 38.4ms
7: 480x480 22 persons, 2 skiss, 38.4ms
Speed: 0.0ms preprocess, 38.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  63%|██████▎   | 313/500 [02:13<01:19,  2.36it/s]


0: 480x480 (no detections), 36.4ms
1: 480x480 (no detections), 36.4ms
2: 480x480 (no detections), 36.4ms
3: 480x480 (no detections), 36.4ms
4: 480x480 (no detections), 36.4ms
5: 480x480 (no detections), 36.4ms
6: 480x480 (no detections), 36.4ms
7: 480x480 (no detections), 36.4ms
Speed: 0.0ms preprocess, 36.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  63%|██████▎   | 314/500 [02:14<01:18,  2.37it/s]


0: 480x480 4 persons, 1 bottle, 4 cups, 1 knife, 3 bowls, 1 chair, 1 dining table, 39.8ms
1: 480x480 1 person, 1 frisbee, 39.8ms
2: 480x480 1 person, 1 surfboard, 39.8ms
3: 480x480 1 zebra, 1 tennis racket, 39.8ms
4: 480x480 (no detections), 39.8ms
5: 480x480 1 bird, 39.8ms
6: 480x480 3 persons, 39.8ms
7: 480x480 1 person, 1 sports ball, 39.8ms
Speed: 0.0ms preprocess, 39.8ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  63%|██████▎   | 315/500 [02:14<01:19,  2.33it/s]


0: 480x480 (no detections), 37.8ms
1: 480x480 (no detections), 37.8ms
2: 480x480 (no detections), 37.8ms
3: 480x480 (no detections), 37.8ms
4: 480x480 (no detections), 37.8ms
5: 480x480 (no detections), 37.8ms
6: 480x480 (no detections), 37.8ms
7: 480x480 (no detections), 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  63%|██████▎   | 316/500 [02:14<01:18,  2.35it/s]


0: 480x480 (no detections), 38.0ms
1: 480x480 (no detections), 38.0ms
2: 480x480 (no detections), 38.0ms
3: 480x480 (no detections), 38.0ms
4: 480x480 (no detections), 38.0ms
5: 480x480 (no detections), 38.0ms
6: 480x480 (no detections), 38.0ms
7: 480x480 (no detections), 38.0ms
Speed: 0.0ms preprocess, 38.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  63%|██████▎   | 317/500 [02:15<01:17,  2.37it/s]


0: 480x480 1 bed, 1 laptop, 35.6ms
1: 480x480 1 person, 1 horse, 35.6ms
2: 480x480 (no detections), 35.6ms
3: 480x480 1 person, 1 toilet, 35.6ms
4: 480x480 1 car, 1 bus, 35.6ms
5: 480x480 1 train, 35.6ms
6: 480x480 2 persons, 1 cup, 1 fork, 1 knife, 2 bowls, 1 dining table, 35.6ms
7: 480x480 (no detections), 35.6ms
Speed: 0.0ms preprocess, 35.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  64%|██████▎   | 318/500 [02:15<01:16,  2.37it/s]


0: 480x480 (no detections), 38.8ms
1: 480x480 (no detections), 38.8ms
2: 480x480 (no detections), 38.8ms
3: 480x480 (no detections), 38.8ms
4: 480x480 (no detections), 38.8ms
5: 480x480 (no detections), 38.8ms
6: 480x480 (no detections), 38.8ms
7: 480x480 (no detections), 38.8ms
Speed: 0.0ms preprocess, 38.8ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  64%|██████▍   | 319/500 [02:16<01:16,  2.37it/s]


0: 480x480 (no detections), 37.1ms
1: 480x480 (no detections), 37.1ms
2: 480x480 (no detections), 37.1ms
3: 480x480 (no detections), 37.1ms
4: 480x480 (no detections), 37.1ms
5: 480x480 (no detections), 37.1ms
6: 480x480 (no detections), 37.1ms
7: 480x480 (no detections), 37.1ms
Speed: 0.0ms preprocess, 37.1ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  64%|██████▍   | 320/500 [02:16<01:15,  2.39it/s]


0: 480x480 (no detections), 37.9ms
1: 480x480 (no detections), 37.9ms
2: 480x480 (no detections), 37.9ms
3: 480x480 (no detections), 37.9ms
4: 480x480 (no detections), 37.9ms
5: 480x480 (no detections), 37.9ms
6: 480x480 (no detections), 37.9ms
7: 480x480 (no detections), 37.9ms
Speed: 0.0ms preprocess, 37.9ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  64%|██████▍   | 321/500 [02:17<01:15,  2.37it/s]


0: 480x480 (no detections), 38.1ms
1: 480x480 (no detections), 38.1ms
2: 480x480 (no detections), 38.1ms
3: 480x480 (no detections), 38.1ms
4: 480x480 (no detections), 38.1ms
5: 480x480 (no detections), 38.1ms
6: 480x480 (no detections), 38.1ms
7: 480x480 (no detections), 38.1ms
Speed: 0.0ms preprocess, 38.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  64%|██████▍   | 322/500 [02:17<01:14,  2.38it/s]


0: 480x480 1 bus, 1 truck, 36.3ms
1: 480x480 2 persons, 1 bottle, 36.3ms
2: 480x480 1 laptop, 1 mouse, 1 keyboard, 36.3ms
3: 480x480 2 persons, 1 tennis racket, 36.3ms
4: 480x480 1 potted plant, 1 vase, 36.3ms
5: 480x480 1 person, 36.3ms
6: 480x480 5 persons, 11 cars, 1 bus, 36.3ms
7: 480x480 9 persons, 1 tennis racket, 36.3ms
Speed: 0.0ms preprocess, 36.3ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  65%|██████▍   | 323/500 [02:17<01:14,  2.38it/s]


0: 480x480 (no detections), 37.9ms
1: 480x480 (no detections), 37.9ms
2: 480x480 (no detections), 37.9ms
3: 480x480 (no detections), 37.9ms
4: 480x480 (no detections), 37.9ms
5: 480x480 (no detections), 37.9ms
6: 480x480 (no detections), 37.9ms
7: 480x480 (no detections), 37.9ms
Speed: 0.0ms preprocess, 37.9ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  65%|██████▍   | 324/500 [02:18<01:13,  2.38it/s]


0: 480x480 (no detections), 36.5ms
1: 480x480 (no detections), 36.5ms
2: 480x480 (no detections), 36.5ms
3: 480x480 (no detections), 36.5ms
4: 480x480 (no detections), 36.5ms
5: 480x480 (no detections), 36.5ms
6: 480x480 (no detections), 36.5ms
7: 480x480 (no detections), 36.5ms
Speed: 0.0ms preprocess, 36.5ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  65%|██████▌   | 325/500 [02:18<01:13,  2.39it/s]


0: 480x480 (no detections), 41.4ms
1: 480x480 (no detections), 41.4ms
2: 480x480 (no detections), 41.4ms
3: 480x480 (no detections), 41.4ms
4: 480x480 (no detections), 41.4ms
5: 480x480 (no detections), 41.4ms
6: 480x480 (no detections), 41.4ms
7: 480x480 (no detections), 41.4ms
Speed: 0.0ms preprocess, 41.4ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  65%|██████▌   | 326/500 [02:19<01:14,  2.34it/s]


0: 480x480 3 persons, 1 horse, 35.9ms
1: 480x480 1 fire hydrant, 35.9ms
2: 480x480 6 persons, 2 baseball bats, 2 baseball gloves, 35.9ms
3: 480x480 1 person, 35.9ms
4: 480x480 1 boat, 35.9ms
5: 480x480 5 persons, 1 bus, 35.9ms
6: 480x480 6 donuts, 35.9ms
7: 480x480 1 boat, 35.9ms
Speed: 0.0ms preprocess, 35.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  65%|██████▌   | 327/500 [02:19<01:13,  2.35it/s]


0: 480x480 4 persons, 1 car, 3 buss, 1 truck, 1 traffic light, 36.9ms
1: 480x480 1 mouse, 1 keyboard, 36.9ms
2: 480x480 1 person, 1 surfboard, 36.9ms
3: 480x480 1 person, 36.9ms
4: 480x480 1 person, 1 pizza, 36.9ms
5: 480x480 1 person, 1 frisbee, 36.9ms
6: 480x480 1 person, 1 keyboard, 36.9ms
7: 480x480 1 cup, 1 donut, 1 dining table, 36.9ms
Speed: 0.0ms preprocess, 36.9ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  66%|██████▌   | 328/500 [02:19<01:13,  2.34it/s]


0: 480x480 6 persons, 1 car, 2 skateboards, 36.8ms
1: 480x480 4 persons, 1 banana, 3 donuts, 36.8ms
2: 480x480 3 persons, 36.8ms
3: 480x480 1 bed, 1 remote, 36.8ms
4: 480x480 1 airplane, 36.8ms
5: 480x480 (no detections), 36.8ms
6: 480x480 1 cow, 1 zebra, 36.8ms
7: 480x480 2 cars, 1 bus, 1 stop sign, 36.8ms
Speed: 0.0ms preprocess, 36.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  66%|██████▌   | 329/500 [02:20<01:13,  2.33it/s]


0: 480x480 (no detections), 37.7ms
1: 480x480 (no detections), 37.7ms
2: 480x480 (no detections), 37.7ms
3: 480x480 (no detections), 37.7ms
4: 480x480 (no detections), 37.7ms
5: 480x480 (no detections), 37.7ms
6: 480x480 (no detections), 37.7ms
7: 480x480 (no detections), 37.7ms
Speed: 0.0ms preprocess, 37.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  66%|██████▌   | 330/500 [02:20<01:12,  2.35it/s]


0: 480x480 2 persons, 4 cars, 1 truck, 2 horses, 42.6ms
1: 480x480 1 clock, 42.6ms
2: 480x480 9 persons, 4 cups, 2 pizzas, 42.6ms
3: 480x480 2 ovens, 42.6ms
4: 480x480 1 toilet, 42.6ms
5: 480x480 1 person, 3 kites, 42.6ms
6: 480x480 1 person, 2 cars, 42.6ms
7: 480x480 1 person, 42.6ms
Speed: 0.0ms preprocess, 42.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  66%|██████▌   | 331/500 [02:21<01:14,  2.27it/s]


0: 480x480 (no detections), 35.6ms
1: 480x480 (no detections), 35.6ms
2: 480x480 (no detections), 35.6ms
3: 480x480 (no detections), 35.6ms
4: 480x480 (no detections), 35.6ms
5: 480x480 (no detections), 35.6ms
6: 480x480 (no detections), 35.6ms
7: 480x480 (no detections), 35.6ms
Speed: 0.0ms preprocess, 35.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  66%|██████▋   | 332/500 [02:21<01:13,  2.30it/s]


0: 480x480 (no detections), 35.2ms
1: 480x480 (no detections), 35.2ms
2: 480x480 (no detections), 35.2ms
3: 480x480 (no detections), 35.2ms
4: 480x480 (no detections), 35.2ms
5: 480x480 (no detections), 35.2ms
6: 480x480 (no detections), 35.2ms
7: 480x480 (no detections), 35.2ms
Speed: 0.0ms preprocess, 35.2ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  67%|██████▋   | 333/500 [02:22<01:10,  2.37it/s]


0: 480x480 2 persons, 1 tennis racket, 36.4ms
1: 480x480 1 person, 2 bottles, 3 sinks, 36.4ms
2: 480x480 1 toilet, 1 sink, 36.4ms
3: 480x480 1 boat, 2 birds, 1 clock, 36.4ms
4: 480x480 2 bottles, 2 wine glasss, 2 pizzas, 36.4ms
5: 480x480 2 tvs, 1 laptop, 2 mouses, 3 keyboards, 2 cell phones, 36.4ms
6: 480x480 1 bed, 36.4ms
7: 480x480 8 persons, 1 tennis racket, 36.4ms
Speed: 0.0ms preprocess, 36.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  67%|██████▋   | 334/500 [02:22<01:10,  2.36it/s]


0: 480x480 1 cat, 33.5ms
1: 480x480 1 person, 1 tennis racket, 33.5ms
2: 480x480 1 person, 1 laptop, 1 keyboard, 1 book, 33.5ms
3: 480x480 3 cups, 2 tvs, 2 laptops, 33.5ms
4: 480x480 16 sheeps, 33.5ms
5: 480x480 1 cat, 1 bed, 33.5ms
6: 480x480 1 bus, 33.5ms
7: 480x480 6 persons, 33.5ms
Speed: 0.0ms preprocess, 33.5ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  67%|██████▋   | 335/500 [02:22<01:09,  2.37it/s]


0: 480x480 (no detections), 38.1ms
1: 480x480 (no detections), 38.1ms
2: 480x480 (no detections), 38.1ms
3: 480x480 (no detections), 38.1ms
4: 480x480 (no detections), 38.1ms
5: 480x480 (no detections), 38.1ms
6: 480x480 (no detections), 38.1ms
7: 480x480 (no detections), 38.1ms
Speed: 0.0ms preprocess, 38.1ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  67%|██████▋   | 336/500 [02:23<01:09,  2.34it/s]


0: 480x480 3 bowls, 1 orange, 39.1ms
1: 480x480 6 persons, 1 boat, 1 bird, 4 cows, 39.1ms
2: 480x480 2 persons, 1 bird, 39.1ms
3: 480x480 1 car, 35 sheeps, 39.1ms
4: 480x480 1 person, 1 bus, 39.1ms
5: 480x480 (no detections), 39.1ms
6: 480x480 1 person, 39.1ms
7: 480x480 1 person, 1 surfboard, 39.1ms
Speed: 0.0ms preprocess, 39.1ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  67%|██████▋   | 337/500 [02:23<01:10,  2.33it/s]


0: 480x480 (no detections), 37.7ms
1: 480x480 (no detections), 37.7ms
2: 480x480 (no detections), 37.7ms
3: 480x480 (no detections), 37.7ms
4: 480x480 (no detections), 37.7ms
5: 480x480 (no detections), 37.7ms
6: 480x480 (no detections), 37.7ms
7: 480x480 (no detections), 37.7ms
Speed: 0.0ms preprocess, 37.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  68%|██████▊   | 338/500 [02:24<01:09,  2.32it/s]


0: 480x480 1 giraffe, 36.4ms
1: 480x480 2 sandwichs, 36.4ms
2: 480x480 1 person, 36.4ms
3: 480x480 1 person, 1 train, 36.4ms
4: 480x480 1 person, 1 skateboard, 36.4ms
5: 480x480 1 person, 4 cars, 1 bus, 1 truck, 36.4ms
6: 480x480 2 giraffes, 36.4ms
7: 480x480 (no detections), 36.4ms
Speed: 0.0ms preprocess, 36.4ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 480)


Validating:  68%|██████▊   | 339/500 [02:24<01:09,  2.31it/s]


0: 480x480 (no detections), 37.6ms
1: 480x480 (no detections), 37.6ms
2: 480x480 (no detections), 37.6ms
3: 480x480 (no detections), 37.6ms
4: 480x480 (no detections), 37.6ms
5: 480x480 (no detections), 37.6ms
6: 480x480 (no detections), 37.6ms
7: 480x480 (no detections), 37.6ms
Speed: 0.0ms preprocess, 37.6ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  68%|██████▊   | 340/500 [02:25<01:08,  2.32it/s]


0: 480x480 (no detections), 38.1ms
1: 480x480 (no detections), 38.1ms
2: 480x480 (no detections), 38.1ms
3: 480x480 (no detections), 38.1ms
4: 480x480 (no detections), 38.1ms
5: 480x480 (no detections), 38.1ms
6: 480x480 (no detections), 38.1ms
7: 480x480 (no detections), 38.1ms
Speed: 0.0ms preprocess, 38.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  68%|██████▊   | 341/500 [02:25<01:08,  2.31it/s]


0: 480x480 (no detections), 37.3ms
1: 480x480 (no detections), 37.3ms
2: 480x480 (no detections), 37.3ms
3: 480x480 (no detections), 37.3ms
4: 480x480 (no detections), 37.3ms
5: 480x480 (no detections), 37.3ms
6: 480x480 (no detections), 37.3ms
7: 480x480 (no detections), 37.3ms
Speed: 0.0ms preprocess, 37.3ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  68%|██████▊   | 342/500 [02:26<01:08,  2.32it/s]


0: 480x480 1 person, 1 tennis racket, 37.5ms
1: 480x480 9 persons, 10 chairs, 37.5ms
2: 480x480 1 toilet, 1 sink, 37.5ms
3: 480x480 1 person, 1 truck, 37.5ms
4: 480x480 5 persons, 11 bottles, 6 wine glasss, 1 cup, 1 bowl, 1 chair, 1 refrigerator, 37.5ms
5: 480x480 2 sheeps, 37.5ms
6: 480x480 1 fork, 1 knife, 6 bowls, 2 oranges, 25 carrots, 1 scissors, 37.5ms
7: 480x480 2 persons, 1 baseball bat, 1 baseball glove, 37.5ms
Speed: 0.0ms preprocess, 37.5ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  69%|██████▊   | 343/500 [02:26<01:08,  2.28it/s]


0: 480x480 (no detections), 36.0ms
1: 480x480 (no detections), 36.0ms
2: 480x480 (no detections), 36.0ms
3: 480x480 (no detections), 36.0ms
4: 480x480 (no detections), 36.0ms
5: 480x480 (no detections), 36.0ms
6: 480x480 (no detections), 36.0ms
7: 480x480 (no detections), 36.0ms
Speed: 0.0ms preprocess, 36.0ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  69%|██████▉   | 344/500 [02:26<01:06,  2.35it/s]


0: 480x480 (no detections), 34.8ms
1: 480x480 (no detections), 34.8ms
2: 480x480 (no detections), 34.8ms
3: 480x480 (no detections), 34.8ms
4: 480x480 (no detections), 34.8ms
5: 480x480 (no detections), 34.8ms
6: 480x480 (no detections), 34.8ms
7: 480x480 (no detections), 34.8ms
Speed: 0.0ms preprocess, 34.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  69%|██████▉   | 345/500 [02:27<01:04,  2.40it/s]


0: 480x480 1 person, 2 clocks, 33.2ms
1: 480x480 1 person, 1 frisbee, 33.2ms
2: 480x480 1 person, 1 tennis racket, 33.2ms
3: 480x480 1 person, 1 umbrella, 1 handbag, 33.2ms
4: 480x480 1 dog, 1 bed, 33.2ms
5: 480x480 1 clock, 33.2ms
6: 480x480 1 umbrella, 33.2ms
7: 480x480 1 pizza, 33.2ms
Speed: 0.0ms preprocess, 33.2ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  69%|██████▉   | 346/500 [02:27<01:05,  2.36it/s]


0: 480x480 1 bed, 37.0ms
1: 480x480 1 bird, 37.0ms
2: 480x480 1 person, 1 sports ball, 1 tennis racket, 37.0ms
3: 480x480 3 persons, 2 bottles, 4 wine glasss, 1 knife, 1 pizza, 37.0ms
4: 480x480 1 remote, 37.0ms
5: 480x480 1 person, 1 umbrella, 37.0ms
6: 480x480 1 bowl, 37.0ms
7: 480x480 12 cows, 37.0ms
Speed: 0.0ms preprocess, 37.0ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:  69%|██████▉   | 347/500 [02:28<01:05,  2.33it/s]


0: 480x480 (no detections), 36.2ms
1: 480x480 (no detections), 36.2ms
2: 480x480 (no detections), 36.2ms
3: 480x480 (no detections), 36.2ms
4: 480x480 (no detections), 36.2ms
5: 480x480 (no detections), 36.2ms
6: 480x480 (no detections), 36.2ms
7: 480x480 (no detections), 36.2ms
Speed: 0.0ms preprocess, 36.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  70%|██████▉   | 348/500 [02:28<01:04,  2.37it/s]


0: 480x480 1 airplane, 34.2ms
1: 480x480 2 benchs, 34.2ms
2: 480x480 1 boat, 1 baseball bat, 3 clocks, 34.2ms
3: 480x480 6 persons, 6 bottles, 2 cups, 1 banana, 1 clock, 34.2ms
4: 480x480 2 persons, 34.2ms
5: 480x480 3 persons, 1 skateboard, 1 tennis racket, 34.2ms
6: 480x480 2 sheeps, 34.2ms
7: 480x480 1 bus, 34.2ms
Speed: 0.0ms preprocess, 34.2ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  70%|██████▉   | 349/500 [02:28<01:02,  2.40it/s]


0: 480x480 (no detections), 38.1ms
1: 480x480 (no detections), 38.1ms
2: 480x480 (no detections), 38.1ms
3: 480x480 (no detections), 38.1ms
4: 480x480 (no detections), 38.1ms
5: 480x480 (no detections), 38.1ms
6: 480x480 (no detections), 38.1ms
7: 480x480 (no detections), 38.1ms
Speed: 0.0ms preprocess, 38.1ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:  70%|███████   | 350/500 [02:29<01:02,  2.39it/s]


0: 480x480 4 persons, 1 sheep, 39.1ms
1: 480x480 2 persons, 1 skis, 39.1ms
2: 480x480 1 person, 1 sports ball, 2 baseball gloves, 39.1ms
3: 480x480 1 banana, 1 vase, 39.1ms
4: 480x480 1 bird, 39.1ms
5: 480x480 1 bowl, 1 broccoli, 39.1ms
6: 480x480 5 bananas, 39.1ms
7: 480x480 8 bottles, 1 bowl, 1 microwave, 1 oven, 1 refrigerator, 39.1ms
Speed: 0.0ms preprocess, 39.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  70%|███████   | 351/500 [02:29<01:03,  2.33it/s]


0: 480x480 (no detections), 35.2ms
1: 480x480 (no detections), 35.2ms
2: 480x480 (no detections), 35.2ms
3: 480x480 (no detections), 35.2ms
4: 480x480 (no detections), 35.2ms
5: 480x480 (no detections), 35.2ms
6: 480x480 (no detections), 35.2ms
7: 480x480 (no detections), 35.2ms
Speed: 0.0ms preprocess, 35.2ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  70%|███████   | 352/500 [02:30<01:02,  2.35it/s]


0: 480x480 1 person, 1 frisbee, 36.1ms
1: 480x480 1 teddy bear, 36.1ms
2: 480x480 2 apples, 3 oranges, 36.1ms
3: 480x480 4 persons, 1 bench, 36.1ms
4: 480x480 1 laptop, 1 mouse, 2 keyboards, 36.1ms
5: 480x480 1 person, 2 cats, 36.1ms
6: 480x480 18 sheeps, 36.1ms
7: 480x480 7 persons, 5 surfboards, 36.1ms
Speed: 0.0ms preprocess, 36.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  71%|███████   | 353/500 [02:30<01:02,  2.35it/s]


0: 480x480 (no detections), 36.9ms
1: 480x480 (no detections), 36.9ms
2: 480x480 (no detections), 36.9ms
3: 480x480 (no detections), 36.9ms
4: 480x480 (no detections), 36.9ms
5: 480x480 (no detections), 36.9ms
6: 480x480 (no detections), 36.9ms
7: 480x480 (no detections), 36.9ms
Speed: 0.0ms preprocess, 36.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  71%|███████   | 354/500 [02:31<01:01,  2.37it/s]


0: 480x480 (no detections), 38.4ms
1: 480x480 (no detections), 38.4ms
2: 480x480 (no detections), 38.4ms
3: 480x480 (no detections), 38.4ms
4: 480x480 (no detections), 38.4ms
5: 480x480 (no detections), 38.4ms
6: 480x480 (no detections), 38.4ms
7: 480x480 (no detections), 38.4ms
Speed: 0.0ms preprocess, 38.4ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  71%|███████   | 355/500 [02:31<01:00,  2.39it/s]


0: 480x480 3 persons, 1 tie, 2 wine glasss, 34.4ms
1: 480x480 3 cars, 34.4ms
2: 480x480 1 boat, 1 bird, 1 dog, 34.4ms
3: 480x480 6 persons, 1 handbag, 34.4ms
4: 480x480 1 bottle, 1 tv, 34.4ms
5: 480x480 1 person, 1 wine glass, 1 bed, 34.4ms
6: 480x480 1 person, 3 benchs, 1 handbag, 34.4ms
7: 480x480 5 persons, 1 bicycle, 2 cars, 1 traffic light, 2 umbrellas, 1 handbag, 34.4ms
Speed: 0.0ms preprocess, 34.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  71%|███████   | 356/500 [02:31<01:00,  2.39it/s]


0: 480x480 4 cars, 1 truck, 1 stop sign, 37.4ms
1: 480x480 3 persons, 3 skiss, 37.4ms
2: 480x480 (no detections), 37.4ms
3: 480x480 1 clock, 37.4ms
4: 480x480 1 person, 5 cars, 1 skateboard, 37.4ms
5: 480x480 2 persons, 4 cars, 1 bus, 1 truck, 2 benchs, 37.4ms
6: 480x480 4 persons, 1 cow, 28 elephants, 1 backpack, 37.4ms
7: 480x480 1 bowl, 1 chair, 1 oven, 1 refrigerator, 37.4ms
Speed: 0.0ms preprocess, 37.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  71%|███████▏  | 357/500 [02:32<01:00,  2.36it/s]


0: 480x480 (no detections), 35.4ms
1: 480x480 (no detections), 35.4ms
2: 480x480 1 bed, 35.4ms
3: 480x480 4 persons, 35.4ms
4: 480x480 1 person, 2 beds, 35.4ms
5: 480x480 1 person, 7 sheeps, 35.4ms
6: 480x480 12 persons, 5 umbrellas, 1 bottle, 3 chairs, 1 dining table, 35.4ms
7: 480x480 1 cat, 1 keyboard, 35.4ms
Speed: 0.0ms preprocess, 35.4ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  72%|███████▏  | 358/500 [02:32<00:59,  2.38it/s]


0: 480x480 3 trains, 36.2ms
1: 480x480 (no detections), 36.2ms
2: 480x480 1 person, 1 surfboard, 36.2ms
3: 480x480 1 person, 3 bottles, 1 remote, 36.2ms
4: 480x480 (no detections), 36.2ms
5: 480x480 (no detections), 36.2ms
6: 480x480 1 umbrella, 36.2ms
7: 480x480 5 persons, 36.2ms
Speed: 0.0ms preprocess, 36.2ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  72%|███████▏  | 359/500 [02:33<00:59,  2.39it/s]


0: 480x480 3 bowls, 38.5ms
1: 480x480 (no detections), 38.5ms
2: 480x480 2 cups, 1 spoon, 2 sandwichs, 38.5ms
3: 480x480 1 person, 2 cars, 1 fire hydrant, 38.5ms
4: 480x480 2 birds, 3 giraffes, 38.5ms
5: 480x480 1 person, 1 surfboard, 38.5ms
6: 480x480 2 persons, 2 wine glasss, 38.5ms
7: 480x480 1 bird, 3 cows, 38.5ms
Speed: 0.0ms preprocess, 38.5ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  72%|███████▏  | 360/500 [02:33<01:00,  2.32it/s]


0: 480x480 2 clocks, 40.2ms
1: 480x480 (no detections), 40.2ms
2: 480x480 1 scissors, 40.2ms
3: 480x480 2 persons, 1 bird, 40.2ms
4: 480x480 4 donuts, 40.2ms
5: 480x480 2 persons, 3 cars, 4 benchs, 1 skateboard, 40.2ms
6: 480x480 7 bottles, 4 bowls, 2 microwaves, 2 ovens, 40.2ms
7: 480x480 1 train, 1 hot dog, 40.2ms
Speed: 0.0ms preprocess, 40.2ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 480)


Validating:  72%|███████▏  | 361/500 [02:34<01:01,  2.26it/s]


0: 480x480 1 person, 40.4ms
1: 480x480 8 persons, 1 umbrella, 2 baseball bats, 1 donut, 40.4ms
2: 480x480 1 elephant, 40.4ms
3: 480x480 2 persons, 40.4ms
4: 480x480 (no detections), 40.4ms
5: 480x480 1 person, 1 tie, 1 cake, 40.4ms
6: 480x480 3 persons, 1 bench, 40.4ms
7: 480x480 3 boats, 40.4ms
Speed: 0.0ms preprocess, 40.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  72%|███████▏  | 362/500 [02:34<01:01,  2.23it/s]


0: 480x480 1 bird, 35.6ms
1: 480x480 (no detections), 35.6ms
2: 480x480 1 bed, 35.6ms
3: 480x480 3 persons, 1 truck, 1 boat, 35.6ms
4: 480x480 1 person, 35.6ms
5: 480x480 1 person, 1 sandwich, 1 hot dog, 35.6ms
6: 480x480 3 persons, 4 bottles, 1 cup, 1 hot dog, 35.6ms
7: 480x480 (no detections), 35.6ms
Speed: 0.0ms preprocess, 35.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  73%|███████▎  | 363/500 [02:34<00:59,  2.29it/s]


0: 480x480 1 bench, 40.2ms
1: 480x480 2 persons, 2 ties, 40.2ms
2: 480x480 1 bus, 40.2ms
3: 480x480 1 person, 2 cars, 2 buss, 4 trucks, 40.2ms
4: 480x480 2 persons, 1 skateboard, 1 surfboard, 40.2ms
5: 480x480 1 toilet, 40.2ms
6: 480x480 (no detections), 40.2ms
7: 480x480 3 persons, 40.2ms
Speed: 0.0ms preprocess, 40.2ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  73%|███████▎  | 364/500 [02:35<01:00,  2.26it/s]


0: 480x480 (no detections), 44.3ms
1: 480x480 (no detections), 44.3ms
2: 480x480 (no detections), 44.3ms
3: 480x480 (no detections), 44.3ms
4: 480x480 (no detections), 44.3ms
5: 480x480 (no detections), 44.3ms
6: 480x480 (no detections), 44.3ms
7: 480x480 (no detections), 44.3ms
Speed: 0.0ms preprocess, 44.3ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  73%|███████▎  | 365/500 [02:35<01:00,  2.22it/s]


0: 480x480 (no detections), 35.7ms
1: 480x480 (no detections), 35.7ms
2: 480x480 (no detections), 35.7ms
3: 480x480 (no detections), 35.7ms
4: 480x480 (no detections), 35.7ms
5: 480x480 (no detections), 35.7ms
6: 480x480 (no detections), 35.7ms
7: 480x480 (no detections), 35.7ms
Speed: 0.0ms preprocess, 35.7ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  73%|███████▎  | 366/500 [02:36<00:58,  2.30it/s]


0: 480x480 (no detections), 39.0ms
1: 480x480 (no detections), 39.0ms
2: 480x480 (no detections), 39.0ms
3: 480x480 (no detections), 39.0ms
4: 480x480 (no detections), 39.0ms
5: 480x480 (no detections), 39.0ms
6: 480x480 (no detections), 39.0ms
7: 480x480 (no detections), 39.0ms
Speed: 0.0ms preprocess, 39.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  73%|███████▎  | 367/500 [02:36<00:57,  2.31it/s]


0: 480x480 (no detections), 35.5ms
1: 480x480 (no detections), 35.5ms
2: 480x480 (no detections), 35.5ms
3: 480x480 (no detections), 35.5ms
4: 480x480 (no detections), 35.5ms
5: 480x480 (no detections), 35.5ms
6: 480x480 (no detections), 35.5ms
7: 480x480 (no detections), 35.5ms
Speed: 0.0ms preprocess, 35.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  74%|███████▎  | 368/500 [02:37<00:56,  2.34it/s]


0: 480x480 2 persons, 2 cars, 1 bus, 36.0ms
1: 480x480 1 person, 1 skateboard, 36.0ms
2: 480x480 2 persons, 1 frisbee, 36.0ms
3: 480x480 1 person, 1 umbrella, 1 kite, 36.0ms
4: 480x480 2 persons, 10 sheeps, 36.0ms
5: 480x480 1 person, 1 motorcycle, 1 bench, 36.0ms
6: 480x480 1 person, 1 bicycle, 5 buss, 1 truck, 36.0ms
7: 480x480 35 bananas, 36.0ms
Speed: 0.0ms preprocess, 36.0ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  74%|███████▍  | 369/500 [02:37<00:56,  2.34it/s]


0: 480x480 (no detections), 37.8ms
1: 480x480 (no detections), 37.8ms
2: 480x480 (no detections), 37.8ms
3: 480x480 (no detections), 37.8ms
4: 480x480 (no detections), 37.8ms
5: 480x480 (no detections), 37.8ms
6: 480x480 (no detections), 37.8ms
7: 480x480 (no detections), 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  74%|███████▍  | 370/500 [02:37<00:55,  2.35it/s]


0: 480x480 (no detections), 38.4ms
1: 480x480 (no detections), 38.4ms
2: 480x480 (no detections), 38.4ms
3: 480x480 (no detections), 38.4ms
4: 480x480 (no detections), 38.4ms
5: 480x480 (no detections), 38.4ms
6: 480x480 (no detections), 38.4ms
7: 480x480 (no detections), 38.4ms
Speed: 0.0ms preprocess, 38.4ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  74%|███████▍  | 371/500 [02:38<00:55,  2.34it/s]


0: 480x480 (no detections), 36.1ms
1: 480x480 (no detections), 36.1ms
2: 480x480 (no detections), 36.1ms
3: 480x480 (no detections), 36.1ms
4: 480x480 (no detections), 36.1ms
5: 480x480 (no detections), 36.1ms
6: 480x480 (no detections), 36.1ms
7: 480x480 (no detections), 36.1ms
Speed: 0.0ms preprocess, 36.1ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  74%|███████▍  | 372/500 [02:38<00:54,  2.36it/s]


0: 480x480 1 dog, 35.6ms
1: 480x480 1 bed, 1 laptop, 35.6ms
2: 480x480 5 persons, 3 cars, 1 motorcycle, 35.6ms
3: 480x480 2 zebras, 35.6ms
4: 480x480 6 elephants, 35.6ms
5: 480x480 (no detections), 35.6ms
6: 480x480 4 persons, 1 tie, 35.6ms
7: 480x480 1 person, 35.6ms
Speed: 0.0ms preprocess, 35.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  75%|███████▍  | 373/500 [02:39<00:55,  2.30it/s]


0: 480x480 3 clocks, 36.9ms
1: 480x480 2 persons, 1 knife, 1 bowl, 4 cakes, 1 chair, 1 dining table, 36.9ms
2: 480x480 1 banana, 1 sandwich, 36.9ms
3: 480x480 1 person, 36.9ms
4: 480x480 1 bed, 36.9ms
5: 480x480 1 tv, 1 laptop, 1 mouse, 1 keyboard, 36.9ms
6: 480x480 1 person, 1 chair, 1 teddy bear, 36.9ms
7: 480x480 2 tvs, 2 laptops, 1 keyboard, 36.9ms
Speed: 0.0ms preprocess, 36.9ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:  75%|███████▍  | 374/500 [02:39<00:54,  2.30it/s]


0: 480x480 (no detections), 38.0ms
1: 480x480 (no detections), 38.0ms
2: 480x480 (no detections), 38.0ms
3: 480x480 (no detections), 38.0ms
4: 480x480 (no detections), 38.0ms
5: 480x480 (no detections), 38.0ms
6: 480x480 (no detections), 38.0ms
7: 480x480 (no detections), 38.0ms
Speed: 0.0ms preprocess, 38.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  75%|███████▌  | 375/500 [02:40<00:54,  2.28it/s]


0: 480x480 1 person, 1 toilet, 36.3ms
1: 480x480 1 bench, 1 potted plant, 36.3ms
2: 480x480 7 bottles, 4 knifes, 1 oven, 1 sink, 1 refrigerator, 36.3ms
3: 480x480 1 traffic light, 36.3ms
4: 480x480 1 wine glass, 1 bowl, 36.3ms
5: 480x480 1 bird, 36.3ms
6: 480x480 2 persons, 1 sports ball, 2 tennis rackets, 36.3ms
7: 480x480 4 persons, 3 bottles, 2 wine glasss, 1 cup, 1 cake, 1 chair, 1 dining table, 36.3ms
Speed: 0.0ms preprocess, 36.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  75%|███████▌  | 376/500 [02:40<00:54,  2.29it/s]


0: 480x480 5 persons, 1 sports ball, 36.0ms
1: 480x480 1 person, 1 sports ball, 2 tennis rackets, 36.0ms
2: 480x480 2 persons, 3 skateboards, 36.0ms
3: 480x480 8 persons, 1 kite, 36.0ms
4: 480x480 1 person, 1 dog, 1 laptop, 36.0ms
5: 480x480 2 persons, 3 bottles, 1 cake, 36.0ms
6: 480x480 1 person, 1 bus, 1 train, 36.0ms
7: 480x480 7 cars, 1 fire hydrant, 36.0ms
Speed: 0.0ms preprocess, 36.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  75%|███████▌  | 377/500 [02:41<00:53,  2.31it/s]


0: 480x480 (no detections), 37.1ms
1: 480x480 (no detections), 37.1ms
2: 480x480 (no detections), 37.1ms
3: 480x480 (no detections), 37.1ms
4: 480x480 (no detections), 37.1ms
5: 480x480 (no detections), 37.1ms
6: 480x480 (no detections), 37.1ms
7: 480x480 (no detections), 37.1ms
Speed: 0.0ms preprocess, 37.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  76%|███████▌  | 378/500 [02:41<00:52,  2.34it/s]


0: 480x480 (no detections), 34.2ms
1: 480x480 (no detections), 34.2ms
2: 480x480 (no detections), 34.2ms
3: 480x480 (no detections), 34.2ms
4: 480x480 (no detections), 34.2ms
5: 480x480 (no detections), 34.2ms
6: 480x480 (no detections), 34.2ms
7: 480x480 (no detections), 34.2ms
Speed: 0.0ms preprocess, 34.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  76%|███████▌  | 379/500 [02:41<00:50,  2.41it/s]


0: 480x480 10 persons, 5 umbrellas, 1 handbag, 36.6ms
1: 480x480 2 persons, 36.6ms
2: 480x480 4 persons, 4 bicycles, 2 trucks, 36.6ms
3: 480x480 1 bottle, 1 toilet, 1 sink, 36.6ms
4: 480x480 3 persons, 36.6ms
5: 480x480 1 person, 36.6ms
6: 480x480 (no detections), 36.6ms
7: 480x480 2 persons, 1 truck, 36.6ms
Speed: 0.0ms preprocess, 36.6ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:  76%|███████▌  | 380/500 [02:42<00:50,  2.36it/s]


0: 480x480 (no detections), 36.2ms
1: 480x480 (no detections), 36.2ms
2: 480x480 (no detections), 36.2ms
3: 480x480 (no detections), 36.2ms
4: 480x480 (no detections), 36.2ms
5: 480x480 (no detections), 36.2ms
6: 480x480 (no detections), 36.2ms
7: 480x480 (no detections), 36.2ms
Speed: 0.0ms preprocess, 36.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  76%|███████▌  | 381/500 [02:42<00:49,  2.40it/s]


0: 480x480 1 person, 1 tie, 35.3ms
1: 480x480 (no detections), 35.3ms
2: 480x480 1 person, 1 skateboard, 35.3ms
3: 480x480 6 persons, 1 car, 1 umbrella, 2 handbags, 35.3ms
4: 480x480 1 person, 1 tennis racket, 35.3ms
5: 480x480 1 microwave, 35.3ms
6: 480x480 1 train, 35.3ms
7: 480x480 3 elephants, 35.3ms
Speed: 0.0ms preprocess, 35.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  76%|███████▋  | 382/500 [02:43<00:49,  2.38it/s]


0: 480x480 (no detections), 36.1ms
1: 480x480 (no detections), 36.1ms
2: 480x480 (no detections), 36.1ms
3: 480x480 (no detections), 36.1ms
4: 480x480 (no detections), 36.1ms
5: 480x480 (no detections), 36.1ms
6: 480x480 (no detections), 36.1ms
7: 480x480 (no detections), 36.1ms
Speed: 0.0ms preprocess, 36.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 480)


Validating:  77%|███████▋  | 383/500 [02:43<00:48,  2.41it/s]


0: 480x480 (no detections), 38.7ms
1: 480x480 (no detections), 38.7ms
2: 480x480 (no detections), 38.7ms
3: 480x480 (no detections), 38.7ms
4: 480x480 (no detections), 38.7ms
5: 480x480 (no detections), 38.7ms
6: 480x480 (no detections), 38.7ms
7: 480x480 (no detections), 38.7ms
Speed: 0.0ms preprocess, 38.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  77%|███████▋  | 384/500 [02:43<00:48,  2.41it/s]


0: 480x480 6 persons, 34.9ms
1: 480x480 3 persons, 34.9ms
2: 480x480 1 pizza, 34.9ms
3: 480x480 (no detections), 34.9ms
4: 480x480 6 persons, 1 umbrella, 1 suitcase, 1 tv, 1 vase, 34.9ms
5: 480x480 3 persons, 1 bus, 34.9ms
6: 480x480 3 persons, 34.9ms
7: 480x480 1 person, 1 skateboard, 34.9ms
Speed: 0.0ms preprocess, 34.9ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  77%|███████▋  | 385/500 [02:44<00:48,  2.38it/s]


0: 480x480 (no detections), 39.2ms
1: 480x480 (no detections), 39.2ms
2: 480x480 (no detections), 39.2ms
3: 480x480 (no detections), 39.2ms
4: 480x480 (no detections), 39.2ms
5: 480x480 (no detections), 39.2ms
6: 480x480 (no detections), 39.2ms
7: 480x480 (no detections), 39.2ms
Speed: 0.0ms preprocess, 39.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  77%|███████▋  | 386/500 [02:44<00:48,  2.36it/s]


0: 480x480 (no detections), 40.5ms
1: 480x480 (no detections), 40.5ms
2: 480x480 (no detections), 40.5ms
3: 480x480 (no detections), 40.5ms
4: 480x480 (no detections), 40.5ms
5: 480x480 (no detections), 40.5ms
6: 480x480 (no detections), 40.5ms
7: 480x480 (no detections), 40.5ms
Speed: 0.0ms preprocess, 40.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  77%|███████▋  | 387/500 [02:45<00:48,  2.34it/s]


0: 480x480 2 persons, 1 cell phone, 36.7ms
1: 480x480 7 giraffes, 36.7ms
2: 480x480 1 person, 1 skateboard, 36.7ms
3: 480x480 1 boat, 36.7ms
4: 480x480 2 persons, 2 skiss, 36.7ms
5: 480x480 6 donuts, 36.7ms
6: 480x480 (no detections), 36.7ms
7: 480x480 1 person, 1 sports ball, 1 tennis racket, 36.7ms
Speed: 0.0ms preprocess, 36.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  78%|███████▊  | 388/500 [02:45<00:49,  2.27it/s]


0: 480x480 (no detections), 49.2ms
1: 480x480 (no detections), 49.2ms
2: 480x480 (no detections), 49.2ms
3: 480x480 (no detections), 49.2ms
4: 480x480 (no detections), 49.2ms
5: 480x480 (no detections), 49.2ms
6: 480x480 (no detections), 49.2ms
7: 480x480 (no detections), 49.2ms
Speed: 0.0ms preprocess, 49.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  78%|███████▊  | 389/500 [02:46<00:51,  2.16it/s]


0: 480x480 (no detections), 41.4ms
1: 480x480 (no detections), 41.4ms
2: 480x480 (no detections), 41.4ms
3: 480x480 (no detections), 41.4ms
4: 480x480 (no detections), 41.4ms
5: 480x480 (no detections), 41.4ms
6: 480x480 (no detections), 41.4ms
7: 480x480 (no detections), 41.4ms
Speed: 0.0ms preprocess, 41.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  78%|███████▊  | 390/500 [02:46<00:50,  2.17it/s]


0: 480x480 (no detections), 39.6ms
1: 480x480 (no detections), 39.6ms
2: 480x480 (no detections), 39.6ms
3: 480x480 (no detections), 39.6ms
4: 480x480 (no detections), 39.6ms
5: 480x480 (no detections), 39.6ms
6: 480x480 (no detections), 39.6ms
7: 480x480 (no detections), 39.6ms
Speed: 0.0ms preprocess, 39.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  78%|███████▊  | 391/500 [02:47<00:49,  2.21it/s]


0: 480x480 (no detections), 38.2ms
1: 480x480 (no detections), 38.2ms
2: 480x480 (no detections), 38.2ms
3: 480x480 (no detections), 38.2ms
4: 480x480 (no detections), 38.2ms
5: 480x480 (no detections), 38.2ms
6: 480x480 (no detections), 38.2ms
7: 480x480 (no detections), 38.2ms
Speed: 0.0ms preprocess, 38.2ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  78%|███████▊  | 392/500 [02:47<00:47,  2.26it/s]


0: 480x480 (no detections), 37.7ms
1: 480x480 (no detections), 37.7ms
2: 480x480 (no detections), 37.7ms
3: 480x480 (no detections), 37.7ms
4: 480x480 (no detections), 37.7ms
5: 480x480 (no detections), 37.7ms
6: 480x480 (no detections), 37.7ms
7: 480x480 (no detections), 37.7ms
Speed: 0.0ms preprocess, 37.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  79%|███████▊  | 393/500 [02:47<00:46,  2.30it/s]


0: 480x480 (no detections), 38.1ms
1: 480x480 (no detections), 38.1ms
2: 480x480 (no detections), 38.1ms
3: 480x480 (no detections), 38.1ms
4: 480x480 (no detections), 38.1ms
5: 480x480 (no detections), 38.1ms
6: 480x480 (no detections), 38.1ms
7: 480x480 (no detections), 38.1ms
Speed: 0.0ms preprocess, 38.1ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  79%|███████▉  | 394/500 [02:48<00:45,  2.33it/s]


0: 480x480 3 persons, 2 handbags, 1 sports ball, 36.1ms
1: 480x480 2 keyboards, 36.1ms
2: 480x480 2 keyboards, 36.1ms
3: 480x480 1 person, 1 bicycle, 4 boats, 1 bench, 36.1ms
4: 480x480 2 beds, 36.1ms
5: 480x480 1 couch, 1 toilet, 2 ovens, 1 refrigerator, 36.1ms
6: 480x480 4 chairs, 36.1ms
7: 480x480 1 car, 1 truck, 36.1ms
Speed: 0.0ms preprocess, 36.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  79%|███████▉  | 395/500 [02:48<00:45,  2.31it/s]


0: 480x480 (no detections), 36.8ms
1: 480x480 (no detections), 36.8ms
2: 480x480 (no detections), 36.8ms
3: 480x480 (no detections), 36.8ms
4: 480x480 (no detections), 36.8ms
5: 480x480 (no detections), 36.8ms
6: 480x480 (no detections), 36.8ms
7: 480x480 (no detections), 36.8ms
Speed: 0.0ms preprocess, 36.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  79%|███████▉  | 396/500 [02:49<00:44,  2.34it/s]


0: 480x480 (no detections), 38.5ms
1: 480x480 1 car, 1 truck, 2 clocks, 38.5ms
2: 480x480 1 person, 1 bench, 38.5ms
3: 480x480 2 persons, 1 donut, 38.5ms
4: 480x480 1 giraffe, 38.5ms
5: 480x480 1 kite, 38.5ms
6: 480x480 3 bananas, 38.5ms
7: 480x480 3 ovens, 38.5ms
Speed: 0.0ms preprocess, 38.5ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  79%|███████▉  | 397/500 [02:49<00:44,  2.32it/s]


0: 480x480 12 cars, 6 trucks, 36.1ms
1: 480x480 1 person, 2 chairs, 3 beds, 36.1ms
2: 480x480 3 cars, 2 trucks, 36.1ms
3: 480x480 2 giraffes, 36.1ms
4: 480x480 1 cup, 2 bowls, 1 carrot, 36.1ms
5: 480x480 1 person, 1 umbrella, 36.1ms
6: 480x480 2 persons, 36.1ms
7: 480x480 1 bottle, 36.1ms
Speed: 0.0ms preprocess, 36.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  80%|███████▉  | 398/500 [02:50<00:44,  2.30it/s]


0: 480x480 (no detections), 42.3ms
1: 480x480 (no detections), 42.3ms
2: 480x480 (no detections), 42.3ms
3: 480x480 (no detections), 42.3ms
4: 480x480 (no detections), 42.3ms
5: 480x480 (no detections), 42.3ms
6: 480x480 (no detections), 42.3ms
7: 480x480 (no detections), 42.3ms
Speed: 0.0ms preprocess, 42.3ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  80%|███████▉  | 399/500 [02:50<00:44,  2.26it/s]


0: 480x480 2 cups, 1 bowl, 1 orange, 1 dining table, 36.1ms
1: 480x480 1 bottle, 2 toilets, 3 sinks, 36.1ms
2: 480x480 1 surfboard, 9 refrigerators, 36.1ms
3: 480x480 1 motorcycle, 36.1ms
4: 480x480 5 persons, 3 umbrellas, 14 chairs, 1 dining table, 36.1ms
5: 480x480 1 airplane, 36.1ms
6: 480x480 1 person, 1 sports ball, 1 tennis racket, 36.1ms
7: 480x480 10 persons, 36.1ms
Speed: 0.0ms preprocess, 36.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  80%|████████  | 400/500 [02:50<00:43,  2.29it/s]


0: 480x480 3 bottles, 1 cup, 1 bowl, 1 orange, 36.0ms
1: 480x480 1 cup, 1 bowl, 1 sandwich, 36.0ms
2: 480x480 25 persons, 4 skiss, 1 snowboard, 36.0ms
3: 480x480 3 persons, 1 tv, 36.0ms
4: 480x480 1 tie, 36.0ms
5: 480x480 5 persons, 1 car, 1 truck, 36.0ms
6: 480x480 2 persons, 1 tennis racket, 36.0ms
7: 480x480 17 bottles, 1 bed, 36.0ms
Speed: 0.0ms preprocess, 36.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  80%|████████  | 401/500 [02:51<00:43,  2.29it/s]


0: 480x480 (no detections), 36.8ms
1: 480x480 (no detections), 36.8ms
2: 480x480 (no detections), 36.8ms
3: 480x480 (no detections), 36.8ms
4: 480x480 (no detections), 36.8ms
5: 480x480 (no detections), 36.8ms
6: 480x480 (no detections), 36.8ms
7: 480x480 (no detections), 36.8ms
Speed: 0.0ms preprocess, 36.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  80%|████████  | 402/500 [02:51<00:42,  2.33it/s]


0: 480x480 3 zebras, 35.1ms
1: 480x480 1 person, 35.1ms
2: 480x480 6 persons, 3 elephants, 35.1ms
3: 480x480 1 oven, 35.1ms
4: 480x480 2 persons, 1 couch, 35.1ms
5: 480x480 1 pizza, 35.1ms
6: 480x480 6 persons, 2 boats, 1 bench, 35.1ms
7: 480x480 1 tv, 3 laptops, 1 mouse, 3 keyboards, 3 cell phones, 35.1ms
Speed: 0.0ms preprocess, 35.1ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:  81%|████████  | 403/500 [02:52<00:41,  2.36it/s]


0: 480x480 (no detections), 37.9ms
1: 480x480 (no detections), 37.9ms
2: 480x480 (no detections), 37.9ms
3: 480x480 (no detections), 37.9ms
4: 480x480 (no detections), 37.9ms
5: 480x480 (no detections), 37.9ms
6: 480x480 (no detections), 37.9ms
7: 480x480 (no detections), 37.9ms
Speed: 0.0ms preprocess, 37.9ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  81%|████████  | 404/500 [02:52<00:40,  2.36it/s]


0: 480x480 1 person, 2 laptops, 1 microwave, 35.9ms
1: 480x480 3 persons, 35.9ms
2: 480x480 1 person, 2 chairs, 1 potted plant, 1 vase, 35.9ms
3: 480x480 1 keyboard, 1 clock, 35.9ms
4: 480x480 1 car, 1 bus, 35.9ms
5: 480x480 1 clock, 35.9ms
6: 480x480 1 bed, 35.9ms
7: 480x480 13 persons, 1 car, 1 bottle, 1 cake, 35.9ms
Speed: 0.0ms preprocess, 35.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  81%|████████  | 405/500 [02:53<00:40,  2.37it/s]


0: 480x480 1 person, 5 bottles, 2 cups, 1 sandwich, 1 dining table, 36.3ms
1: 480x480 1 person, 2 beds, 36.3ms
2: 480x480 3 persons, 1 frisbee, 36.3ms
3: 480x480 4 bananas, 2 cakes, 36.3ms
4: 480x480 3 persons, 3 remotes, 36.3ms
5: 480x480 5 persons, 2 umbrellas, 1 tie, 1 kite, 36.3ms
6: 480x480 1 person, 1 bed, 36.3ms
7: 480x480 1 airplane, 1 umbrella, 36.3ms
Speed: 0.0ms preprocess, 36.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  81%|████████  | 406/500 [02:53<00:40,  2.35it/s]


0: 480x480 1 person, 1 truck, 7 kites, 37.1ms
1: 480x480 1 person, 1 train, 1 suitcase, 37.1ms
2: 480x480 1 giraffe, 37.1ms
3: 480x480 2 persons, 1 skis, 37.1ms
4: 480x480 2 bottles, 4 bowls, 3 bananas, 37.1ms
5: 480x480 1 person, 2 bicycles, 1 umbrella, 37.1ms
6: 480x480 2 bottles, 1 cup, 2 spoons, 1 bowl, 1 dining table, 37.1ms
7: 480x480 5 persons, 7 cars, 1 bottle, 3 cups, 3 knifes, 2 bowls, 2 pizzas, 1 dining table, 37.1ms
Speed: 0.0ms preprocess, 37.1ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  81%|████████▏ | 407/500 [02:53<00:40,  2.31it/s]


0: 480x480 3 persons, 3 skiss, 35.3ms
1: 480x480 5 persons, 35.3ms
2: 480x480 1 person, 1 tie, 35.3ms
3: 480x480 3 bears, 35.3ms
4: 480x480 1 person, 35.3ms
5: 480x480 1 bed, 1 teddy bear, 35.3ms
6: 480x480 2 persons, 1 bench, 1 remote, 1 cell phone, 35.3ms
7: 480x480 1 clock, 35.3ms
Speed: 0.0ms preprocess, 35.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  82%|████████▏ | 408/500 [02:54<00:39,  2.34it/s]


0: 480x480 (no detections), 39.8ms
1: 480x480 (no detections), 39.8ms
2: 480x480 (no detections), 39.8ms
3: 480x480 (no detections), 39.8ms
4: 480x480 (no detections), 39.8ms
5: 480x480 (no detections), 39.8ms
6: 480x480 (no detections), 39.8ms
7: 480x480 (no detections), 39.8ms
Speed: 0.0ms preprocess, 39.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  82%|████████▏ | 409/500 [02:54<00:39,  2.29it/s]


0: 480x480 1 person, 1 donut, 38.3ms
1: 480x480 1 person, 38.3ms
2: 480x480 6 zebras, 38.3ms
3: 480x480 3 persons, 1 remote, 38.3ms
4: 480x480 12 persons, 1 sports ball, 38.3ms
5: 480x480 1 person, 2 beds, 1 teddy bear, 38.3ms
6: 480x480 4 persons, 38.3ms
7: 480x480 1 bus, 1 teddy bear, 38.3ms
Speed: 0.0ms preprocess, 38.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  82%|████████▏ | 410/500 [02:55<00:39,  2.26it/s]


0: 480x480 (no detections), 39.6ms
1: 480x480 (no detections), 39.6ms
2: 480x480 (no detections), 39.6ms
3: 480x480 (no detections), 39.6ms
4: 480x480 (no detections), 39.6ms
5: 480x480 (no detections), 39.6ms
6: 480x480 (no detections), 39.6ms
7: 480x480 (no detections), 39.6ms
Speed: 0.0ms preprocess, 39.6ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  82%|████████▏ | 411/500 [02:55<00:39,  2.26it/s]


0: 480x480 (no detections), 37.9ms
1: 480x480 1 person, 1 tennis racket, 37.9ms
2: 480x480 1 bird, 37.9ms
3: 480x480 4 persons, 1 train, 1 handbag, 1 suitcase, 37.9ms
4: 480x480 2 cars, 37.9ms
5: 480x480 1 cat, 1 laptop, 37.9ms
6: 480x480 1 elephant, 37.9ms
7: 480x480 1 bird, 37.9ms
Speed: 0.0ms preprocess, 37.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  82%|████████▏ | 412/500 [02:56<00:38,  2.27it/s]


0: 480x480 1 person, 1 horse, 37.7ms
1: 480x480 1 person, 1 skateboard, 37.7ms
2: 480x480 1 person, 1 tie, 37.7ms
3: 480x480 (no detections), 37.7ms
4: 480x480 1 bicycle, 1 car, 1 bus, 1 truck, 37.7ms
5: 480x480 1 truck, 2 boats, 37.7ms
6: 480x480 7 persons, 37.7ms
7: 480x480 1 airplane, 1 truck, 37.7ms
Speed: 0.0ms preprocess, 37.7ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 480)


Validating:  83%|████████▎ | 413/500 [02:56<00:38,  2.28it/s]


0: 480x480 1 train, 36.9ms
1: 480x480 1 person, 1 tie, 36.9ms
2: 480x480 2 donuts, 36.9ms
3: 480x480 1 train, 36.9ms
4: 480x480 1 bed, 36.9ms
5: 480x480 2 persons, 36.9ms
6: 480x480 3 cars, 2 trucks, 36.9ms
7: 480x480 4 persons, 4 cars, 5 trucks, 36.9ms
Speed: 0.0ms preprocess, 36.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  83%|████████▎ | 414/500 [02:57<00:37,  2.29it/s]


0: 480x480 (no detections), 38.9ms
1: 480x480 (no detections), 38.9ms
2: 480x480 (no detections), 38.9ms
3: 480x480 (no detections), 38.9ms
4: 480x480 (no detections), 38.9ms
5: 480x480 (no detections), 38.9ms
6: 480x480 (no detections), 38.9ms
7: 480x480 (no detections), 38.9ms
Speed: 0.0ms preprocess, 38.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  83%|████████▎ | 415/500 [02:57<00:37,  2.29it/s]


0: 480x480 (no detections), 34.5ms
1: 480x480 (no detections), 34.5ms
2: 480x480 (no detections), 34.5ms
3: 480x480 (no detections), 34.5ms
4: 480x480 (no detections), 34.5ms
5: 480x480 (no detections), 34.5ms
6: 480x480 (no detections), 34.5ms
7: 480x480 (no detections), 34.5ms
Speed: 0.0ms preprocess, 34.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  83%|████████▎ | 416/500 [02:57<00:35,  2.34it/s]


0: 480x480 (no detections), 34.8ms
1: 480x480 (no detections), 34.8ms
2: 480x480 (no detections), 34.8ms
3: 480x480 (no detections), 34.8ms
4: 480x480 (no detections), 34.8ms
5: 480x480 (no detections), 34.8ms
6: 480x480 (no detections), 34.8ms
7: 480x480 (no detections), 34.8ms
Speed: 0.0ms preprocess, 34.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  83%|████████▎ | 417/500 [02:58<00:34,  2.39it/s]


0: 480x480 2 persons, 43.3ms
1: 480x480 1 person, 1 truck, 1 potted plant, 2 toilets, 43.3ms
2: 480x480 2 persons, 3 bottles, 1 wine glass, 9 cakes, 43.3ms
3: 480x480 5 persons, 1 toothbrush, 43.3ms
4: 480x480 (no detections), 43.3ms
5: 480x480 1 bird, 43.3ms
6: 480x480 1 clock, 43.3ms
7: 480x480 1 bus, 1 train, 43.3ms
Speed: 0.0ms preprocess, 43.3ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  84%|████████▎ | 418/500 [02:58<00:35,  2.28it/s]


0: 480x480 (no detections), 35.1ms
1: 480x480 (no detections), 35.1ms
2: 480x480 (no detections), 35.1ms
3: 480x480 (no detections), 35.1ms
4: 480x480 (no detections), 35.1ms
5: 480x480 (no detections), 35.1ms
6: 480x480 (no detections), 35.1ms
7: 480x480 (no detections), 35.1ms
Speed: 0.0ms preprocess, 35.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  84%|████████▍ | 419/500 [02:59<00:34,  2.34it/s]


0: 480x480 1 person, 1 tie, 37.8ms
1: 480x480 11 persons, 8 bottles, 6 cups, 2 dining tables, 37.8ms
2: 480x480 1 stop sign, 37.8ms
3: 480x480 10 persons, 1 tie, 3 bottles, 1 chair, 1 dining table, 37.8ms
4: 480x480 2 buss, 37.8ms
5: 480x480 1 person, 37.8ms
6: 480x480 1 person, 3 clocks, 37.8ms
7: 480x480 3 persons, 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  84%|████████▍ | 420/500 [02:59<00:34,  2.33it/s]


0: 480x480 1 person, 1 sports ball, 1 tennis racket, 36.4ms
1: 480x480 1 person, 1 surfboard, 36.4ms
2: 480x480 1 bus, 36.4ms
3: 480x480 3 persons, 1 car, 1 airplane, 4 trucks, 36.4ms
4: 480x480 3 persons, 1 baseball glove, 36.4ms
5: 480x480 1 laptop, 36.4ms
6: 480x480 2 persons, 36.4ms
7: 480x480 1 fork, 1 knife, 1 cake, 36.4ms
Speed: 0.0ms preprocess, 36.4ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:  84%|████████▍ | 421/500 [03:00<00:33,  2.35it/s]


0: 480x480 (no detections), 34.1ms
1: 480x480 2 zebras, 1 giraffe, 34.1ms
2: 480x480 1 person, 1 cell phone, 34.1ms
3: 480x480 7 donuts, 34.1ms
4: 480x480 1 dog, 34.1ms
5: 480x480 1 vase, 34.1ms
6: 480x480 1 cup, 34.1ms
7: 480x480 2 bottles, 3 apples, 2 chairs, 34.1ms
Speed: 0.0ms preprocess, 34.1ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  84%|████████▍ | 422/500 [03:00<00:32,  2.37it/s]


0: 480x480 (no detections), 37.3ms
1: 480x480 (no detections), 37.3ms
2: 480x480 (no detections), 37.3ms
3: 480x480 (no detections), 37.3ms
4: 480x480 (no detections), 37.3ms
5: 480x480 (no detections), 37.3ms
6: 480x480 (no detections), 37.3ms
7: 480x480 (no detections), 37.3ms
Speed: 0.0ms preprocess, 37.3ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  85%|████████▍ | 423/500 [03:00<00:32,  2.39it/s]


0: 480x480 (no detections), 34.9ms
1: 480x480 (no detections), 34.9ms
2: 480x480 (no detections), 34.9ms
3: 480x480 (no detections), 34.9ms
4: 480x480 (no detections), 34.9ms
5: 480x480 (no detections), 34.9ms
6: 480x480 (no detections), 34.9ms
7: 480x480 (no detections), 34.9ms
Speed: 0.0ms preprocess, 34.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  85%|████████▍ | 424/500 [03:01<00:31,  2.43it/s]


0: 480x480 (no detections), 35.5ms
1: 480x480 (no detections), 35.5ms
2: 480x480 (no detections), 35.5ms
3: 480x480 (no detections), 35.5ms
4: 480x480 (no detections), 35.5ms
5: 480x480 (no detections), 35.5ms
6: 480x480 (no detections), 35.5ms
7: 480x480 (no detections), 35.5ms
Speed: 0.0ms preprocess, 35.5ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 480)


Validating:  85%|████████▌ | 425/500 [03:01<00:30,  2.46it/s]


0: 480x480 1 person, 1 bed, 1 tv, 36.9ms
1: 480x480 1 zebra, 1 cake, 36.9ms
2: 480x480 1 sheep, 1 cow, 36.9ms
3: 480x480 6 boats, 36.9ms
4: 480x480 1 cat, 1 bed, 36.9ms
5: 480x480 3 persons, 1 tie, 36.9ms
6: 480x480 2 persons, 3 cars, 1 bus, 3 traffic lights, 1 handbag, 36.9ms
7: 480x480 2 persons, 1 cup, 2 dining tables, 1 laptop, 36.9ms
Speed: 0.0ms preprocess, 36.9ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  85%|████████▌ | 426/500 [03:02<00:30,  2.43it/s]


0: 480x480 4 persons, 1 horse, 36.3ms
1: 480x480 2 persons, 1 bench, 36.3ms
2: 480x480 3 persons, 2 skateboards, 2 bottles, 1 hot dog, 1 oven, 36.3ms
3: 480x480 (no detections), 36.3ms
4: 480x480 1 person, 1 bus, 36.3ms
5: 480x480 2 birds, 36.3ms
6: 480x480 11 zebras, 36.3ms
7: 480x480 6 elephants, 36.3ms
Speed: 0.0ms preprocess, 36.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  85%|████████▌ | 427/500 [03:02<00:30,  2.42it/s]


0: 480x480 (no detections), 40.4ms
1: 480x480 1 person, 40.4ms
2: 480x480 2 boats, 40.4ms
3: 480x480 (no detections), 40.4ms
4: 480x480 1 person, 1 umbrella, 40.4ms
5: 480x480 1 fork, 1 bowl, 1 carrot, 1 dining table, 40.4ms
6: 480x480 2 persons, 4 surfboards, 40.4ms
7: 480x480 1 person, 40.4ms
Speed: 0.0ms preprocess, 40.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  86%|████████▌ | 428/500 [03:02<00:30,  2.35it/s]


0: 480x480 4 laptops, 1 keyboard, 35.3ms
1: 480x480 2 bottles, 1 laptop, 1 microwave, 1 oven, 35.3ms
2: 480x480 2 persons, 1 kite, 35.3ms
3: 480x480 (no detections), 35.3ms
4: 480x480 1 tv, 35.3ms
5: 480x480 1 airplane, 35.3ms
6: 480x480 2 persons, 2 cars, 1 boat, 35.3ms
7: 480x480 1 cake, 35.3ms
Speed: 0.0ms preprocess, 35.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  86%|████████▌ | 429/500 [03:03<00:30,  2.36it/s]


0: 480x480 (no detections), 40.2ms
1: 480x480 (no detections), 40.2ms
2: 480x480 (no detections), 40.2ms
3: 480x480 (no detections), 40.2ms
4: 480x480 (no detections), 40.2ms
5: 480x480 (no detections), 40.2ms
6: 480x480 (no detections), 40.2ms
7: 480x480 (no detections), 40.2ms
Speed: 0.0ms preprocess, 40.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  86%|████████▌ | 430/500 [03:03<00:29,  2.35it/s]


0: 480x480 2 persons, 37.5ms
1: 480x480 1 bed, 37.5ms
2: 480x480 1 bus, 37.5ms
3: 480x480 (no detections), 37.5ms
4: 480x480 1 person, 1 truck, 37.5ms
5: 480x480 1 bottle, 1 cup, 1 microwave, 1 oven, 37.5ms
6: 480x480 4 persons, 1 bottle, 37.5ms
7: 480x480 1 person, 1 surfboard, 37.5ms
Speed: 0.0ms preprocess, 37.5ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  86%|████████▌ | 431/500 [03:04<00:29,  2.35it/s]


0: 480x480 1 giraffe, 39.8ms
1: 480x480 3 persons, 5 bananas, 39.8ms
2: 480x480 1 banana, 1 sandwich, 39.8ms
3: 480x480 2 persons, 5 cars, 39.8ms
4: 480x480 4 persons, 1 laptop, 39.8ms
5: 480x480 1 bottle, 3 pizzas, 39.8ms
6: 480x480 1 person, 1 motorcycle, 39.8ms
7: 480x480 1 car, 1 bus, 39.8ms
Speed: 0.0ms preprocess, 39.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  86%|████████▋ | 432/500 [03:04<00:29,  2.30it/s]


0: 480x480 (no detections), 34.7ms
1: 480x480 (no detections), 34.7ms
2: 480x480 (no detections), 34.7ms
3: 480x480 (no detections), 34.7ms
4: 480x480 (no detections), 34.7ms
5: 480x480 (no detections), 34.7ms
6: 480x480 (no detections), 34.7ms
7: 480x480 (no detections), 34.7ms
Speed: 0.0ms preprocess, 34.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  87%|████████▋ | 433/500 [03:05<00:28,  2.38it/s]


0: 480x480 (no detections), 35.8ms
1: 480x480 (no detections), 35.8ms
2: 480x480 (no detections), 35.8ms
3: 480x480 (no detections), 35.8ms
4: 480x480 (no detections), 35.8ms
5: 480x480 (no detections), 35.8ms
6: 480x480 (no detections), 35.8ms
7: 480x480 (no detections), 35.8ms
Speed: 0.0ms preprocess, 35.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  87%|████████▋ | 434/500 [03:05<00:27,  2.42it/s]


0: 480x480 (no detections), 37.9ms
1: 480x480 (no detections), 37.9ms
2: 480x480 (no detections), 37.9ms
3: 480x480 (no detections), 37.9ms
4: 480x480 (no detections), 37.9ms
5: 480x480 (no detections), 37.9ms
6: 480x480 (no detections), 37.9ms
7: 480x480 (no detections), 37.9ms
Speed: 0.0ms preprocess, 37.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  87%|████████▋ | 435/500 [03:05<00:26,  2.43it/s]


0: 480x480 (no detections), 36.0ms
1: 480x480 (no detections), 36.0ms
2: 480x480 (no detections), 36.0ms
3: 480x480 (no detections), 36.0ms
4: 480x480 (no detections), 36.0ms
5: 480x480 (no detections), 36.0ms
6: 480x480 (no detections), 36.0ms
7: 480x480 (no detections), 36.0ms
Speed: 0.0ms preprocess, 36.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  87%|████████▋ | 436/500 [03:06<00:26,  2.46it/s]


0: 480x480 (no detections), 38.3ms
1: 480x480 (no detections), 38.3ms
2: 480x480 (no detections), 38.3ms
3: 480x480 (no detections), 38.3ms
4: 480x480 (no detections), 38.3ms
5: 480x480 (no detections), 38.3ms
6: 480x480 (no detections), 38.3ms
7: 480x480 (no detections), 38.3ms
Speed: 0.0ms preprocess, 38.3ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  87%|████████▋ | 437/500 [03:06<00:25,  2.43it/s]


0: 480x480 (no detections), 37.8ms
1: 480x480 (no detections), 37.8ms
2: 480x480 (no detections), 37.8ms
3: 480x480 (no detections), 37.8ms
4: 480x480 (no detections), 37.8ms
5: 480x480 (no detections), 37.8ms
6: 480x480 (no detections), 37.8ms
7: 480x480 (no detections), 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  88%|████████▊ | 438/500 [03:07<00:25,  2.41it/s]


0: 480x480 3 giraffes, 34.4ms
1: 480x480 (no detections), 34.4ms
2: 480x480 4 persons, 1 baseball bat, 1 baseball glove, 34.4ms
3: 480x480 2 bowls, 1 dining table, 34.4ms
4: 480x480 2 tvs, 1 laptop, 1 mouse, 2 keyboards, 34.4ms
5: 480x480 6 persons, 1 car, 2 buss, 3 handbags, 34.4ms
6: 480x480 1 toilet, 34.4ms
7: 480x480 2 persons, 1 boat, 34.4ms
Speed: 0.0ms preprocess, 34.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  88%|████████▊ | 439/500 [03:07<00:25,  2.40it/s]


0: 480x480 (no detections), 37.7ms
1: 480x480 (no detections), 37.7ms
2: 480x480 (no detections), 37.7ms
3: 480x480 (no detections), 37.7ms
4: 480x480 (no detections), 37.7ms
5: 480x480 (no detections), 37.7ms
6: 480x480 (no detections), 37.7ms
7: 480x480 (no detections), 37.7ms
Speed: 0.0ms preprocess, 37.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  88%|████████▊ | 440/500 [03:07<00:24,  2.40it/s]


0: 480x480 (no detections), 35.5ms
1: 480x480 (no detections), 35.5ms
2: 480x480 (no detections), 35.5ms
3: 480x480 (no detections), 35.5ms
4: 480x480 (no detections), 35.5ms
5: 480x480 (no detections), 35.5ms
6: 480x480 (no detections), 35.5ms
7: 480x480 (no detections), 35.5ms
Speed: 0.0ms preprocess, 35.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  88%|████████▊ | 441/500 [03:08<00:24,  2.44it/s]


0: 480x480 1 person, 37.9ms
1: 480x480 2 persons, 1 bench, 1 bed, 37.9ms
2: 480x480 1 person, 1 bench, 9 birds, 1 handbag, 37.9ms
3: 480x480 1 boat, 37.9ms
4: 480x480 1 person, 1 frisbee, 37.9ms
5: 480x480 1 bottle, 1 pizza, 2 dining tables, 37.9ms
6: 480x480 12 persons, 1 car, 5 boats, 1 bird, 1 backpack, 1 handbag, 37.9ms
7: 480x480 3 persons, 2 cars, 2 trains, 1 handbag, 37.9ms
Speed: 0.0ms preprocess, 37.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  88%|████████▊ | 442/500 [03:08<00:24,  2.39it/s]


0: 480x480 1 person, 1 potted plant, 1 vase, 41.6ms
1: 480x480 1 clock, 41.6ms
2: 480x480 1 vase, 41.6ms
3: 480x480 (no detections), 41.6ms
4: 480x480 (no detections), 41.6ms
5: 480x480 2 persons, 1 surfboard, 1 bottle, 2 beds, 1 remote, 41.6ms
6: 480x480 1 person, 41.6ms
7: 480x480 2 persons, 1 umbrella, 41.6ms
Speed: 0.0ms preprocess, 41.6ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  89%|████████▊ | 443/500 [03:09<00:24,  2.31it/s]


0: 480x480 2 persons, 1 spoon, 36.8ms
1: 480x480 2 persons, 1 surfboard, 36.8ms
2: 480x480 2 persons, 3 wine glasss, 1 pizza, 36.8ms
3: 480x480 (no detections), 36.8ms
4: 480x480 2 bottles, 1 sandwich, 36.8ms
5: 480x480 (no detections), 36.8ms
6: 480x480 1 person, 1 surfboard, 36.8ms
7: 480x480 7 persons, 3 hot dogs, 36.8ms
Speed: 0.0ms preprocess, 36.8ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  89%|████████▉ | 444/500 [03:09<00:24,  2.30it/s]


0: 480x480 1 person, 1 motorcycle, 1 skateboard, 36.9ms
1: 480x480 1 laptop, 1 mouse, 36.9ms
2: 480x480 1 car, 1 bus, 1 train, 36.9ms
3: 480x480 4 persons, 1 bus, 36.9ms
4: 480x480 1 bus, 36.9ms
5: 480x480 2 persons, 2 cars, 1 baseball bat, 36.9ms
6: 480x480 3 persons, 1 kite, 36.9ms
7: 480x480 1 toilet, 1 sink, 36.9ms
Speed: 0.0ms preprocess, 36.9ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:  89%|████████▉ | 445/500 [03:10<00:23,  2.32it/s]


0: 480x480 (no detections), 35.5ms
1: 480x480 (no detections), 35.5ms
2: 480x480 (no detections), 35.5ms
3: 480x480 (no detections), 35.5ms
4: 480x480 (no detections), 35.5ms
5: 480x480 (no detections), 35.5ms
6: 480x480 (no detections), 35.5ms
7: 480x480 (no detections), 35.5ms
Speed: 0.0ms preprocess, 35.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  89%|████████▉ | 446/500 [03:10<00:22,  2.38it/s]


0: 480x480 (no detections), 38.2ms
1: 480x480 (no detections), 38.2ms
2: 480x480 (no detections), 38.2ms
3: 480x480 (no detections), 38.2ms
4: 480x480 (no detections), 38.2ms
5: 480x480 (no detections), 38.2ms
6: 480x480 (no detections), 38.2ms
7: 480x480 (no detections), 38.2ms
Speed: 0.0ms preprocess, 38.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  89%|████████▉ | 447/500 [03:10<00:22,  2.39it/s]


0: 480x480 (no detections), 38.5ms
1: 480x480 (no detections), 38.5ms
2: 480x480 (no detections), 38.5ms
3: 480x480 (no detections), 38.5ms
4: 480x480 (no detections), 38.5ms
5: 480x480 (no detections), 38.5ms
6: 480x480 (no detections), 38.5ms
7: 480x480 (no detections), 38.5ms
Speed: 0.0ms preprocess, 38.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  90%|████████▉ | 448/500 [03:11<00:21,  2.38it/s]


0: 480x480 (no detections), 35.7ms
1: 480x480 (no detections), 35.7ms
2: 480x480 (no detections), 35.7ms
3: 480x480 (no detections), 35.7ms
4: 480x480 (no detections), 35.7ms
5: 480x480 (no detections), 35.7ms
6: 480x480 (no detections), 35.7ms
7: 480x480 (no detections), 35.7ms
Speed: 0.0ms preprocess, 35.7ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  90%|████████▉ | 449/500 [03:11<00:21,  2.42it/s]


0: 480x480 1 dog, 37.2ms
1: 480x480 1 boat, 1 bench, 37.2ms
2: 480x480 1 cat, 2 remotes, 37.2ms
3: 480x480 10 persons, 1 surfboard, 37.2ms
4: 480x480 4 persons, 1 handbag, 37.2ms
5: 480x480 1 person, 1 boat, 37.2ms
6: 480x480 1 fork, 1 bowl, 1 pizza, 37.2ms
7: 480x480 1 umbrella, 37.2ms
Speed: 0.0ms preprocess, 37.2ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  90%|█████████ | 450/500 [03:12<00:20,  2.40it/s]


0: 480x480 1 bear, 37.9ms
1: 480x480 17 persons, 1 car, 4 frisbees, 37.9ms
2: 480x480 2 potted plants, 1 vase, 37.9ms
3: 480x480 1 toothbrush, 37.9ms
4: 480x480 1 person, 2 beds, 37.9ms
5: 480x480 1 bowl, 1 banana, 37.9ms
6: 480x480 1 bowl, 37.9ms
7: 480x480 3 persons, 37.9ms
Speed: 0.0ms preprocess, 37.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  90%|█████████ | 451/500 [03:12<00:20,  2.35it/s]


0: 480x480 (no detections), 34.3ms
1: 480x480 (no detections), 34.3ms
2: 480x480 (no detections), 34.3ms
3: 480x480 (no detections), 34.3ms
4: 480x480 (no detections), 34.3ms
5: 480x480 (no detections), 34.3ms
6: 480x480 (no detections), 34.3ms
7: 480x480 (no detections), 34.3ms
Speed: 0.0ms preprocess, 34.3ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  90%|█████████ | 452/500 [03:12<00:19,  2.42it/s]


0: 480x480 (no detections), 42.0ms
1: 480x480 (no detections), 42.0ms
2: 480x480 (no detections), 42.0ms
3: 480x480 (no detections), 42.0ms
4: 480x480 (no detections), 42.0ms
5: 480x480 (no detections), 42.0ms
6: 480x480 (no detections), 42.0ms
7: 480x480 (no detections), 42.0ms
Speed: 0.0ms preprocess, 42.0ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  91%|█████████ | 453/500 [03:13<00:19,  2.37it/s]


0: 480x480 10 persons, 1 skateboard, 36.0ms
1: 480x480 (no detections), 36.0ms
2: 480x480 1 bird, 1 dog, 36.0ms
3: 480x480 1 person, 1 laptop, 1 mouse, 1 keyboard, 36.0ms
4: 480x480 1 clock, 36.0ms
5: 480x480 13 persons, 2 airplanes, 36.0ms
6: 480x480 1 dining table, 36.0ms
7: 480x480 2 laptops, 1 keyboard, 36.0ms
Speed: 0.0ms preprocess, 36.0ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  91%|█████████ | 454/500 [03:13<00:19,  2.35it/s]


0: 480x480 (no detections), 37.5ms
1: 480x480 (no detections), 37.5ms
2: 480x480 (no detections), 37.5ms
3: 480x480 (no detections), 37.5ms
4: 480x480 (no detections), 37.5ms
5: 480x480 (no detections), 37.5ms
6: 480x480 (no detections), 37.5ms
7: 480x480 (no detections), 37.5ms
Speed: 0.0ms preprocess, 37.5ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  91%|█████████ | 455/500 [03:14<00:19,  2.35it/s]


0: 480x480 1 car, 3 boats, 34.4ms
1: 480x480 1 person, 34.4ms
2: 480x480 5 persons, 1 kite, 34.4ms
3: 480x480 1 bus, 3 umbrellas, 34.4ms
4: 480x480 1 person, 1 bed, 1 laptop, 34.4ms
5: 480x480 1 person, 5 kites, 34.4ms
6: 480x480 1 person, 1 tie, 34.4ms
7: 480x480 1 bowl, 1 broccoli, 34.4ms
Speed: 0.0ms preprocess, 34.4ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 480)


Validating:  91%|█████████ | 456/500 [03:14<00:18,  2.37it/s]


0: 480x480 (no detections), 36.6ms
1: 480x480 (no detections), 36.6ms
2: 480x480 (no detections), 36.6ms
3: 480x480 (no detections), 36.6ms
4: 480x480 (no detections), 36.6ms
5: 480x480 (no detections), 36.6ms
6: 480x480 (no detections), 36.6ms
7: 480x480 (no detections), 36.6ms
Speed: 0.0ms preprocess, 36.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  91%|█████████▏| 457/500 [03:15<00:18,  2.38it/s]


0: 480x480 1 person, 38.4ms
1: 480x480 (no detections), 38.4ms
2: 480x480 1 person, 2 wine glasss, 1 bowl, 1 sandwich, 1 dining table, 38.4ms
3: 480x480 2 persons, 8 surfboards, 38.4ms
4: 480x480 1 person, 38.4ms
5: 480x480 1 person, 1 bowl, 38.4ms
6: 480x480 1 cat, 38.4ms
7: 480x480 1 fork, 1 bowl, 1 donut, 1 dining table, 38.4ms
Speed: 0.0ms preprocess, 38.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  92%|█████████▏| 458/500 [03:15<00:17,  2.36it/s]


0: 480x480 (no detections), 37.7ms
1: 480x480 (no detections), 37.7ms
2: 480x480 (no detections), 37.7ms
3: 480x480 (no detections), 37.7ms
4: 480x480 (no detections), 37.7ms
5: 480x480 (no detections), 37.7ms
6: 480x480 (no detections), 37.7ms
7: 480x480 (no detections), 37.7ms
Speed: 0.0ms preprocess, 37.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  92%|█████████▏| 459/500 [03:15<00:17,  2.38it/s]


0: 480x480 (no detections), 36.2ms
1: 480x480 (no detections), 36.2ms
2: 480x480 (no detections), 36.2ms
3: 480x480 (no detections), 36.2ms
4: 480x480 (no detections), 36.2ms
5: 480x480 (no detections), 36.2ms
6: 480x480 (no detections), 36.2ms
7: 480x480 (no detections), 36.2ms
Speed: 0.0ms preprocess, 36.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  92%|█████████▏| 460/500 [03:16<00:16,  2.41it/s]


0: 480x480 (no detections), 39.7ms
1: 480x480 (no detections), 39.7ms
2: 480x480 (no detections), 39.7ms
3: 480x480 (no detections), 39.7ms
4: 480x480 (no detections), 39.7ms
5: 480x480 (no detections), 39.7ms
6: 480x480 (no detections), 39.7ms
7: 480x480 (no detections), 39.7ms
Speed: 0.0ms preprocess, 39.7ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  92%|█████████▏| 461/500 [03:16<00:16,  2.36it/s]


0: 480x480 (no detections), 36.3ms
1: 480x480 (no detections), 36.3ms
2: 480x480 (no detections), 36.3ms
3: 480x480 (no detections), 36.3ms
4: 480x480 (no detections), 36.3ms
5: 480x480 (no detections), 36.3ms
6: 480x480 (no detections), 36.3ms
7: 480x480 (no detections), 36.3ms
Speed: 0.0ms preprocess, 36.3ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  92%|█████████▏| 462/500 [03:17<00:16,  2.37it/s]


0: 480x480 1 bottle, 1 cup, 3 bananas, 35.9ms
1: 480x480 1 person, 1 skateboard, 35.9ms
2: 480x480 1 boat, 35.9ms
3: 480x480 10 sheeps, 35.9ms
4: 480x480 1 person, 35.9ms
5: 480x480 1 bottle, 1 cup, 1 knife, 1 laptop, 35.9ms
6: 480x480 1 refrigerator, 35.9ms
7: 480x480 1 giraffe, 35.9ms
Speed: 0.0ms preprocess, 35.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  93%|█████████▎| 463/500 [03:17<00:15,  2.37it/s]


0: 480x480 1 bench, 37.8ms
1: 480x480 1 dining table, 37.8ms
2: 480x480 2 persons, 1 fire hydrant, 37.8ms
3: 480x480 5 persons, 3 umbrellas, 1 bowl, 4 apples, 37.8ms
4: 480x480 1 chair, 37.8ms
5: 480x480 2 toilets, 37.8ms
6: 480x480 2 persons, 2 baseball gloves, 37.8ms
7: 480x480 (no detections), 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  93%|█████████▎| 464/500 [03:18<00:15,  2.34it/s]


0: 480x480 1 person, 1 laptop, 1 cell phone, 36.8ms
1: 480x480 1 person, 1 banana, 36.8ms
2: 480x480 4 persons, 1 car, 1 motorcycle, 36.8ms
3: 480x480 1 train, 36.8ms
4: 480x480 1 train, 36.8ms
5: 480x480 (no detections), 36.8ms
6: 480x480 1 bird, 1 umbrella, 36.8ms
7: 480x480 2 umbrellas, 36.8ms
Speed: 0.0ms preprocess, 36.8ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  93%|█████████▎| 465/500 [03:18<00:14,  2.35it/s]


0: 480x480 (no detections), 39.1ms
1: 480x480 (no detections), 39.1ms
2: 480x480 (no detections), 39.1ms
3: 480x480 (no detections), 39.1ms
4: 480x480 (no detections), 39.1ms
5: 480x480 (no detections), 39.1ms
6: 480x480 (no detections), 39.1ms
7: 480x480 (no detections), 39.1ms
Speed: 0.0ms preprocess, 39.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  93%|█████████▎| 466/500 [03:18<00:14,  2.35it/s]


0: 480x480 (no detections), 35.5ms
1: 480x480 (no detections), 35.5ms
2: 480x480 (no detections), 35.5ms
3: 480x480 (no detections), 35.5ms
4: 480x480 (no detections), 35.5ms
5: 480x480 (no detections), 35.5ms
6: 480x480 (no detections), 35.5ms
7: 480x480 (no detections), 35.5ms
Speed: 0.0ms preprocess, 35.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  93%|█████████▎| 467/500 [03:19<00:13,  2.39it/s]


0: 480x480 1 person, 1 bus, 43.1ms
1: 480x480 1 car, 2 boats, 43.1ms
2: 480x480 1 bench, 5 chairs, 1 couch, 1 dining table, 1 tv, 43.1ms
3: 480x480 1 truck, 2 boats, 43.1ms
4: 480x480 1 airplane, 43.1ms
5: 480x480 1 bottle, 1 couch, 1 tv, 43.1ms
6: 480x480 2 clocks, 43.1ms
7: 480x480 1 person, 3 skiss, 43.1ms
Speed: 0.0ms preprocess, 43.1ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  94%|█████████▎| 468/500 [03:19<00:13,  2.29it/s]


0: 480x480 (no detections), 36.7ms
1: 480x480 (no detections), 36.7ms
2: 480x480 (no detections), 36.7ms
3: 480x480 (no detections), 36.7ms
4: 480x480 (no detections), 36.7ms
5: 480x480 (no detections), 36.7ms
6: 480x480 (no detections), 36.7ms
7: 480x480 (no detections), 36.7ms
Speed: 0.0ms preprocess, 36.7ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  94%|█████████▍| 469/500 [03:20<00:13,  2.31it/s]


0: 480x480 (no detections), 39.6ms
1: 480x480 (no detections), 39.6ms
2: 480x480 (no detections), 39.6ms
3: 480x480 (no detections), 39.6ms
4: 480x480 (no detections), 39.6ms
5: 480x480 (no detections), 39.6ms
6: 480x480 (no detections), 39.6ms
7: 480x480 (no detections), 39.6ms
Speed: 0.0ms preprocess, 39.6ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 480)


Validating:  94%|█████████▍| 470/500 [03:20<00:13,  2.30it/s]


0: 480x480 (no detections), 36.6ms
1: 480x480 (no detections), 36.6ms
2: 480x480 (no detections), 36.6ms
3: 480x480 (no detections), 36.6ms
4: 480x480 (no detections), 36.6ms
5: 480x480 (no detections), 36.6ms
6: 480x480 (no detections), 36.6ms
7: 480x480 (no detections), 36.6ms
Speed: 0.0ms preprocess, 36.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  94%|█████████▍| 471/500 [03:21<00:12,  2.34it/s]


0: 480x480 4 dogs, 37.2ms
1: 480x480 1 bear, 37.2ms
2: 480x480 2 bottles, 1 toilet, 1 sink, 37.2ms
3: 480x480 3 persons, 3 horses, 37.2ms
4: 480x480 3 persons, 37.2ms
5: 480x480 1 person, 1 baseball bat, 37.2ms
6: 480x480 3 bananas, 2 apples, 2 oranges, 37.2ms
7: 480x480 2 persons, 2 birds, 2 handbags, 37.2ms
Speed: 0.0ms preprocess, 37.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  94%|█████████▍| 472/500 [03:21<00:11,  2.34it/s]


0: 480x480 1 bird, 1 umbrella, 39.3ms
1: 480x480 (no detections), 39.3ms
2: 480x480 1 bowl, 1 dining table, 39.3ms
3: 480x480 1 person, 1 dog, 39.3ms
4: 480x480 2 laptops, 1 keyboard, 39.3ms
5: 480x480 2 umbrellas, 1 kite, 39.3ms
6: 480x480 1 umbrella, 39.3ms
7: 480x480 1 mouse, 39.3ms
Speed: 0.0ms preprocess, 39.3ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  95%|█████████▍| 473/500 [03:21<00:11,  2.29it/s]


0: 480x480 3 persons, 1 bus, 1 umbrella, 1 handbag, 35.7ms
1: 480x480 1 fork, 1 orange, 35.7ms
2: 480x480 1 person, 2 bananas, 35.7ms
3: 480x480 2 bowls, 35.7ms
4: 480x480 (no detections), 35.7ms
5: 480x480 1 person, 35.7ms
6: 480x480 (no detections), 35.7ms
7: 480x480 1 bench, 35.7ms
Speed: 0.0ms preprocess, 35.7ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 480)


Validating:  95%|█████████▍| 474/500 [03:22<00:11,  2.31it/s]


0: 480x480 1 cup, 1 bowl, 1 orange, 1 dining table, 39.4ms
1: 480x480 1 person, 39.4ms
2: 480x480 1 person, 1 tennis racket, 39.4ms
3: 480x480 1 bed, 39.4ms
4: 480x480 12 persons, 39.4ms
5: 480x480 1 cup, 2 bowls, 39.4ms
6: 480x480 2 horses, 39.4ms
7: 480x480 9 sheeps, 39.4ms
Speed: 0.0ms preprocess, 39.4ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  95%|█████████▌| 475/500 [03:22<00:11,  2.26it/s]


0: 480x480 (no detections), 35.2ms
1: 480x480 (no detections), 35.2ms
2: 480x480 (no detections), 35.2ms
3: 480x480 (no detections), 35.2ms
4: 480x480 (no detections), 35.2ms
5: 480x480 (no detections), 35.2ms
6: 480x480 (no detections), 35.2ms
7: 480x480 (no detections), 35.2ms
Speed: 0.0ms preprocess, 35.2ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  95%|█████████▌| 476/500 [03:23<00:10,  2.32it/s]


0: 480x480 1 person, 36.7ms
1: 480x480 1 person, 36.7ms
2: 480x480 7 persons, 36.7ms
3: 480x480 3 persons, 1 dog, 1 cup, 36.7ms
4: 480x480 1 surfboard, 36.7ms
5: 480x480 1 person, 1 pizza, 1 dining table, 36.7ms
6: 480x480 2 bottles, 1 oven, 1 book, 36.7ms
7: 480x480 1 person, 1 truck, 1 bed, 36.7ms
Speed: 0.0ms preprocess, 36.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  95%|█████████▌| 477/500 [03:23<00:09,  2.34it/s]


0: 480x480 (no detections), 38.4ms
1: 480x480 (no detections), 38.4ms
2: 480x480 (no detections), 38.4ms
3: 480x480 (no detections), 38.4ms
4: 480x480 (no detections), 38.4ms
5: 480x480 (no detections), 38.4ms
6: 480x480 (no detections), 38.4ms
7: 480x480 (no detections), 38.4ms
Speed: 0.0ms preprocess, 38.4ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  96%|█████████▌| 478/500 [03:24<00:09,  2.31it/s]


0: 480x480 (no detections), 35.0ms
1: 480x480 (no detections), 35.0ms
2: 480x480 (no detections), 35.0ms
3: 480x480 (no detections), 35.0ms
4: 480x480 (no detections), 35.0ms
5: 480x480 (no detections), 35.0ms
6: 480x480 (no detections), 35.0ms
7: 480x480 (no detections), 35.0ms
Speed: 0.0ms preprocess, 35.0ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  96%|█████████▌| 479/500 [03:24<00:08,  2.38it/s]


0: 480x480 (no detections), 39.6ms
1: 480x480 (no detections), 39.6ms
2: 480x480 (no detections), 39.6ms
3: 480x480 (no detections), 39.6ms
4: 480x480 (no detections), 39.6ms
5: 480x480 (no detections), 39.6ms
6: 480x480 (no detections), 39.6ms
7: 480x480 (no detections), 39.6ms
Speed: 0.0ms preprocess, 39.6ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  96%|█████████▌| 480/500 [03:24<00:08,  2.36it/s]


0: 480x480 3 persons, 36.0ms
1: 480x480 1 fork, 1 banana, 2 broccolis, 36.0ms
2: 480x480 16 persons, 1 sports ball, 36.0ms
3: 480x480 2 persons, 36.0ms
4: 480x480 1 laptop, 1 mouse, 1 keyboard, 36.0ms
5: 480x480 1 person, 1 donut, 36.0ms
6: 480x480 1 boat, 13 bananas, 36.0ms
7: 480x480 1 bird, 1 sheep, 36.0ms
Speed: 0.0ms preprocess, 36.0ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 480)


Validating:  96%|█████████▌| 481/500 [03:25<00:08,  2.34it/s]


0: 480x480 2 bottles, 2 toilets, 1 sink, 42.3ms
1: 480x480 (no detections), 42.3ms
2: 480x480 1 keyboard, 42.3ms
3: 480x480 2 persons, 2 horses, 42.3ms
4: 480x480 2 birds, 42.3ms
5: 480x480 1 person, 2 surfboards, 42.3ms
6: 480x480 1 person, 1 sink, 42.3ms
7: 480x480 (no detections), 42.3ms
Speed: 0.0ms preprocess, 42.3ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  96%|█████████▋| 482/500 [03:25<00:08,  2.22it/s]


0: 480x480 (no detections), 36.6ms
1: 480x480 (no detections), 36.6ms
2: 480x480 1 bowl, 4 broccolis, 1 carrot, 36.6ms
3: 480x480 16 persons, 36.6ms
4: 480x480 1 cup, 1 spoon, 1 bowl, 1 apple, 36.6ms
5: 480x480 1 bird, 4 zebras, 36.6ms
6: 480x480 2 broccolis, 2 keyboards, 36.6ms
7: 480x480 3 persons, 1 skateboard, 36.6ms
Speed: 0.0ms preprocess, 36.6ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  97%|█████████▋| 483/500 [03:26<00:07,  2.23it/s]


0: 480x480 (no detections), 36.4ms
1: 480x480 (no detections), 36.4ms
2: 480x480 (no detections), 36.4ms
3: 480x480 (no detections), 36.4ms
4: 480x480 (no detections), 36.4ms
5: 480x480 (no detections), 36.4ms
6: 480x480 (no detections), 36.4ms
7: 480x480 (no detections), 36.4ms
Speed: 0.0ms preprocess, 36.4ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 480)


Validating:  97%|█████████▋| 484/500 [03:26<00:06,  2.29it/s]


0: 480x480 1 person, 1 snowboard, 1 surfboard, 36.2ms
1: 480x480 4 giraffes, 36.2ms
2: 480x480 8 persons, 1 truck, 36.2ms
3: 480x480 2 persons, 6 cars, 2 traffic lights, 36.2ms
4: 480x480 1 person, 36.2ms
5: 480x480 4 persons, 36.2ms
6: 480x480 1 toilet, 36.2ms
7: 480x480 1 person, 1 remote, 36.2ms
Speed: 0.0ms preprocess, 36.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  97%|█████████▋| 485/500 [03:27<00:06,  2.31it/s]


0: 480x480 (no detections), 34.1ms
1: 480x480 5 elephants, 34.1ms
2: 480x480 1 bottle, 34.1ms
3: 480x480 (no detections), 34.1ms
4: 480x480 2 umbrellas, 34.1ms
5: 480x480 3 stop signs, 34.1ms
6: 480x480 (no detections), 34.1ms
7: 480x480 (no detections), 34.1ms
Speed: 0.0ms preprocess, 34.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 480)


Validating:  97%|█████████▋| 486/500 [03:27<00:05,  2.38it/s]


0: 480x480 2 pizzas, 38.8ms
1: 480x480 1 fire hydrant, 38.8ms
2: 480x480 1 horse, 1 cow, 1 elephant, 38.8ms
3: 480x480 2 persons, 1 bus, 1 train, 1 traffic light, 38.8ms
4: 480x480 1 tv, 38.8ms
5: 480x480 2 persons, 38.8ms
6: 480x480 1 cat, 38.8ms
7: 480x480 2 persons, 2 clocks, 38.8ms
Speed: 0.0ms preprocess, 38.8ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  97%|█████████▋| 487/500 [03:27<00:05,  2.33it/s]


0: 480x480 6 persons, 2 cars, 1 handbag, 1 tie, 1 sports ball, 37.4ms
1: 480x480 (no detections), 37.4ms
2: 480x480 2 persons, 1 train, 37.4ms
3: 480x480 2 cars, 1 bus, 37.4ms
4: 480x480 8 persons, 2 fire hydrants, 37.4ms
5: 480x480 2 persons, 1 surfboard, 37.4ms
6: 480x480 1 bird, 37.4ms
7: 480x480 1 wine glass, 1 sandwich, 37.4ms
Speed: 0.0ms preprocess, 37.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  98%|█████████▊| 488/500 [03:28<00:05,  2.32it/s]


0: 480x480 (no detections), 39.2ms
1: 480x480 (no detections), 39.2ms
2: 480x480 (no detections), 39.2ms
3: 480x480 (no detections), 39.2ms
4: 480x480 (no detections), 39.2ms
5: 480x480 (no detections), 39.2ms
6: 480x480 (no detections), 39.2ms
7: 480x480 (no detections), 39.2ms
Speed: 0.0ms preprocess, 39.2ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  98%|█████████▊| 489/500 [03:28<00:04,  2.33it/s]


0: 480x480 (no detections), 37.8ms
1: 480x480 (no detections), 37.8ms
2: 480x480 (no detections), 37.8ms
3: 480x480 (no detections), 37.8ms
4: 480x480 (no detections), 37.8ms
5: 480x480 (no detections), 37.8ms
6: 480x480 (no detections), 37.8ms
7: 480x480 (no detections), 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  98%|█████████▊| 490/500 [03:29<00:04,  2.34it/s]


0: 480x480 (no detections), 35.9ms
1: 480x480 (no detections), 35.9ms
2: 480x480 (no detections), 35.9ms
3: 480x480 (no detections), 35.9ms
4: 480x480 (no detections), 35.9ms
5: 480x480 (no detections), 35.9ms
6: 480x480 (no detections), 35.9ms
7: 480x480 (no detections), 35.9ms
Speed: 0.0ms preprocess, 35.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  98%|█████████▊| 491/500 [03:29<00:03,  2.38it/s]


0: 480x480 1 person, 1 bicycle, 1 bus, 38.3ms
1: 480x480 1 stop sign, 38.3ms
2: 480x480 3 persons, 1 bicycle, 38.3ms
3: 480x480 1 bed, 1 refrigerator, 38.3ms
4: 480x480 3 persons, 2 ties, 38.3ms
5: 480x480 2 persons, 3 airplanes, 2 bottles, 38.3ms
6: 480x480 11 books, 38.3ms
7: 480x480 5 surfboards, 38.3ms
Speed: 0.0ms preprocess, 38.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating:  98%|█████████▊| 492/500 [03:30<00:03,  2.35it/s]


0: 480x480 (no detections), 37.1ms
1: 480x480 (no detections), 37.1ms
2: 480x480 (no detections), 37.1ms
3: 480x480 (no detections), 37.1ms
4: 480x480 (no detections), 37.1ms
5: 480x480 (no detections), 37.1ms
6: 480x480 (no detections), 37.1ms
7: 480x480 (no detections), 37.1ms
Speed: 0.0ms preprocess, 37.1ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating:  99%|█████████▊| 493/500 [03:30<00:02,  2.37it/s]


0: 480x480 (no detections), 38.1ms
1: 480x480 (no detections), 38.1ms
2: 480x480 (no detections), 38.1ms
3: 480x480 (no detections), 38.1ms
4: 480x480 (no detections), 38.1ms
5: 480x480 (no detections), 38.1ms
6: 480x480 (no detections), 38.1ms
7: 480x480 (no detections), 38.1ms
Speed: 0.0ms preprocess, 38.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 480)


Validating:  99%|█████████▉| 494/500 [03:30<00:02,  2.37it/s]


0: 480x480 (no detections), 36.2ms
1: 480x480 (no detections), 36.2ms
2: 480x480 (no detections), 36.2ms
3: 480x480 (no detections), 36.2ms
4: 480x480 (no detections), 36.2ms
5: 480x480 (no detections), 36.2ms
6: 480x480 (no detections), 36.2ms
7: 480x480 (no detections), 36.2ms
Speed: 0.0ms preprocess, 36.2ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 480)


Validating:  99%|█████████▉| 495/500 [03:31<00:02,  2.39it/s]


0: 480x480 1 pizza, 1 dining table, 39.9ms
1: 480x480 2 persons, 1 handbag, 1 skis, 39.9ms
2: 480x480 1 person, 39.9ms
3: 480x480 (no detections), 39.9ms
4: 480x480 2 persons, 39.9ms
5: 480x480 5 zebras, 39.9ms
6: 480x480 1 mouse, 1 cell phone, 39.9ms
7: 480x480 1 potted plant, 1 vase, 39.9ms
Speed: 0.0ms preprocess, 39.9ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 480)


Validating:  99%|█████████▉| 496/500 [03:31<00:01,  2.32it/s]


0: 480x480 2 giraffes, 41.0ms
1: 480x480 6 persons, 1 bicycle, 3 cars, 1 traffic light, 1 bench, 1 handbag, 41.0ms
2: 480x480 1 person, 1 kite, 41.0ms
3: 480x480 1 car, 1 truck, 41.0ms
4: 480x480 1 train, 41.0ms
5: 480x480 (no detections), 41.0ms
6: 480x480 1 couch, 3 beds, 41.0ms
7: 480x480 1 person, 1 bed, 2 books, 41.0ms
Speed: 0.0ms preprocess, 41.0ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 480)


Validating:  99%|█████████▉| 497/500 [03:32<00:01,  2.28it/s]


0: 480x480 (no detections), 36.6ms
1: 480x480 2 wine glasss, 1 cup, 1 potted plant, 2 vases, 36.6ms
2: 480x480 1 train, 36.6ms
3: 480x480 5 persons, 6 bottles, 15 cups, 1 dining table, 36.6ms
4: 480x480 1 pizza, 36.6ms
5: 480x480 1 airplane, 36.6ms
6: 480x480 2 persons, 1 surfboard, 36.6ms
7: 480x480 (no detections), 36.6ms
Speed: 0.0ms preprocess, 36.6ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 480)


Validating: 100%|█████████▉| 498/500 [03:32<00:00,  2.27it/s]


0: 480x480 (no detections), 38.7ms
1: 480x480 (no detections), 38.7ms
2: 480x480 (no detections), 38.7ms
3: 480x480 (no detections), 38.7ms
4: 480x480 (no detections), 38.7ms
5: 480x480 (no detections), 38.7ms
6: 480x480 (no detections), 38.7ms
7: 480x480 (no detections), 38.7ms
Speed: 0.0ms preprocess, 38.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 480)


Validating: 100%|█████████▉| 499/500 [03:33<00:00,  2.31it/s]


0: 480x480 3 cars, 3 cows, 36.2ms
1: 480x480 1 fire hydrant, 36.2ms
2: 480x480 1 person, 1 skateboard, 36.2ms
3: 480x480 1 person, 2 zebras, 1 giraffe, 36.2ms
4: 480x480 1 person, 1 cell phone, 36.2ms
5: 480x480 1 person, 1 skateboard, 36.2ms
6: 480x480 1 clock, 36.2ms
7: 480x480 1 toilet, 1 clock, 36.2ms
Speed: 0.0ms preprocess, 36.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 480)


Validating: 100%|██████████| 500/500 [03:33<00:00,  2.34it/s]


Confiança Média: 0.5823
Entropia Média: 0.2723





In [17]:
print(avg_conf, avg_entropy)

0.5823175340445955 0.27230942
