In [1]:
import torchvision.datasets as dset
import torch
from torch.utils.data import DataLoader as DataLoader
from torchvision import transforms
from torchvision.transforms import ToTensor

In [2]:
# import torchvision.transforms as transforms
import cv2
import numpy as np
import torchvision
import torch
from torch.utils.data import DataLoader as DataLoader
import argparse
from PIL import Image

In [3]:
coco_names = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

In [4]:
pathToData="C:/Users/apole/Desktop/AML/ObjectDetection/V1/datasets/coco/data/val2017"
pathToJson="C:/Users/apole/Desktop/AML/ObjectDetection/V1/datasets/coco/annotations/instances_val2017.json"

In [5]:
# this will help us create a different color for each class
COLORS = np.random.uniform(0, 255, size=(len(coco_names), 3))

In [6]:
def predict(image, model, device, detection_threshold):
    # transform the image to tensor
    image = transform(image).to(device)
    image = image.unsqueeze(0) # add a batch dimension
    outputs = model(image) # get the predictions on the image
    # print the results individually
    # print(f"BOXES: {outputs[0]['boxes']}")
    # print(f"LABELS: {outputs[0]['labels']}")
    # print(f"SCORES: {outputs[0]['scores']}")
    # get all the predicited class names
    pred_classes = [coco_names[i] for i in outputs[0]['labels'].cpu().numpy()]
    # get score for all the predicted objects
    pred_scores = outputs[0]['scores'].detach().cpu().numpy()
    # get all the predicted bounding boxes
    pred_bboxes = outputs[0]['boxes'].detach().cpu().numpy()
    # get boxes above the threshold score
    boxes = pred_bboxes[pred_scores >= detection_threshold].astype(np.int32)
    return boxes, pred_classes, outputs[0]['labels']

def draw_boxes(boxes, classes, labels, image):
    # read the image with OpenCV
    image = cv2.cvtColor(np.asarray(image), cv2.COLOR_BGR2RGB)
    for i, box in enumerate(boxes):
        color = COLORS[labels[i]]
        cv2.rectangle(
            image,
            (int(box[0]), int(box[1])),
            (int(box[2]), int(box[3])),
            color, 2
        )
        cv2.putText(image, classes[i], (int(box[0]), int(box[1]-5)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, 
                    lineType=cv2.LINE_AA)
    return image

In [7]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [8]:
transform = transforms.Compose([
    # you can add other transformations in this list
    transforms.ToTensor(),
    # transforms.Resize((512,512))
])

In [9]:
coco = dset.CocoDetection(root = pathToData, annFile = pathToJson, transform=transform)
print('Number of samples: ', len(coco))
coco

loading annotations into memory...
Done (t=0.99s)
creating index...
index created!
Number of samples:  5000


Dataset CocoDetection
    Number of datapoints: 5000
    Root location: C:/Users/apole/Desktop/AML/ObjectDetection/V1/datasets/coco/data/val2017
    StandardTransform
Transform: Compose(
               ToTensor()
           )

In [10]:
img, target = coco[0]
print (img.size)
print(len(target))
# img

<built-in method size of Tensor object at 0x000001C03FE06D00>
20


In [11]:
# def collate_fn(batch):
#     return tuple(zip(*batch))

In [22]:
def collate_fn(data):
    """Creates mini-batch tensors from the list of tuples (image, caption).
    
    We should build custom collate_fn rather than using default collate_fn, 
    because merging caption (including padding) is not supported in default.
    Args:
        data: list of tuple (image, caption). 
            - image: torch tensor of shape (3, 256, 256).
            - caption: torch tensor of shape (?); variable length.
    Returns:
        images: torch tensor of shape (batch_size, 3, 256, 256).
        targets: torch tensor of shape (batch_size, padded_length).
        lengths: list; valid length for each padded caption.
    """
    # Sort a data list by caption length (descending order).
    data.sort(key=lambda x: len(x[1]), reverse=True)
    images, captions = zip(*data)

    # Merge images (from tuple of 3D tensor to 4D tensor).
    images = torch.stack(images, 0)

    # Merge captions (from tuple of 1D tensor to 2D tensor).
    lengths = [len(cap) for cap in captions]
    targets = torch.zeros(len(captions), max(lengths)).long()
    for i, cap in enumerate(captions):
        end = lengths[i]
        targets[i, :end] = cap[:end]
        # targets[i, :end] = torch.from_numpy(np.array(list(map(ord, cap[:end])))).to(torch.long)     
    return images, targets, lengths

In [20]:
data_loader = DataLoader(coco, batch_size=1, shuffle=False, num_workers=0, collate_fn=collate_fn)
print(len(data_loader))
data_loader

5000


<torch.utils.data.dataloader.DataLoader at 0x1c051ad8eb0>

In [13]:
# Target is Tuple of Lists
for img, annotations in data_loader:
    # for t in annotations:
        # print(len(t))

        # print(x[0].items())
    # print(img)
    # print(target.size)
    # print(len(target))
    # print(torch.tensor(img))
    # print(img.count)
    break

In [117]:
# DataLoader is iterable over Dataset
# for imgs, annotations in data_loader:
    # imgs = list(img.to(device) for img in imgs)
    # annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
    # print(imgs.)

In [14]:
# download or load the model from disk
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True, min_size=800)

In [21]:
model.eval()
for imgs, annotations in data_loader:
    model.to(device)

    image = imgs
    image = transform(image).to(device)
    image = image.unsqueeze(0)
    outputs = model(image)
    # get all the predicited class names
    pred_classes = [coco_names[i] for i in outputs[0]['labels'].cpu().numpy()]
    # get score for all the predicted objects
    pred_scores = outputs[0]['scores'].detach().cpu().numpy()
    # get all the predicted bounding boxes
    pred_bboxes = outputs[0]['boxes'].detach().cpu().numpy()
    # get boxes above the threshold score
    boxes = pred_bboxes[pred_scores >= .8].astype(np.int32)
    # boxes, pred_classes, outputs[0]['labels']
    print(boxes)
    print(pred_classes)
    print(outputs[0]['labels'])

    # image = imgs
    # boxes, classes, labels = predict(image, model, device, 0.8)
    # image = draw_boxes(boxes, classes, labels, image)
    # save_name = f"{args['input'].split('/')[-1].split('.')[0]}_{args['min_size']}"
    # cv2.imwrite(f"outputs/{save_name}.jpg", image)
    # cv2.waitKey(0)
    break


# image = Image.open(args['input'])
# model.eval().to(device)
# boxes, classes, labels = predict(image, model, device, 0.8)
# image = draw_boxes(boxes, classes, labels, image)
# cv2.imshow('Image', image)
# save_name = f"{args['input'].split('/')[-1].split('.')[0]}_{args['min_size']}"
# cv2.imwrite(f"outputs/{save_name}.jpg", image)
# cv2.waitKey(0)

TypeError: ord() expected string of length 1, but dict found