<a href="https://colab.research.google.com/github/Amaljayaranga/DavisDataset/blob/master/JsonCreation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import torch
import torchvision
from PIL import Image
import torchvision.transforms as T
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import json

DEVICE = 'cuda'
if not torch.cuda.is_available():
    DEVICE = 'cpu'

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()
model = model.to(DEVICE)

COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

def get_prediction(img_path, threshold):
    img = Image.open(img_path)
    transform = T.Compose([T.ToTensor()])
    img = transform(img)
    pred = model([img])
    pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())]
    pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(pred[0]['boxes'].detach().numpy())]
    pred_score = list(pred[0]['scores'].detach().numpy())
    pred_t = [pred_score.index(x) for x in pred_score if x > threshold][-1]
    pred_boxes = pred_boxes[:pred_t + 1]
    pred_class = pred_class[:pred_t + 1]
    return pred_boxes, pred_class

def object_detection_api(img_path, threshold=0.5, rect_th=3, text_size=3, text_th=3):
  boxes, pred_cls = get_prediction(img_path, threshold)
  img = cv2.imread(img_path)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  for i in range(len(boxes)):
    cv2.rectangle(img, boxes[i][0], boxes[i][1],color=(0, 255, 0), thickness=rect_th)
    cv2.putText(img,pred_cls[i], boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th)
  plt.figure(figsize=(20,30))
  plt.imshow(img)
  plt.xticks([])
  plt.yticks([])
  plt.show()

#object_detection_api('./bird.jpg', threshold=0.5)
#boxes, pred_cls = get_prediction('./2.jpg', threshold=0.6)
#print(pred_cls)
#print(boxes)

img_data = {}
img_data['imgs'] = []
path = './bear/'

images = os.listdir(path)
for image in images:
    boxes, pred_cls = get_prediction(path+image, threshold=0.6)
    np_boxes = np.array(boxes)
    np_classes = np.array(pred_cls)
    img_object = []

    if np_classes.size == 1:
        for class_, box_ in zip(np_classes, np_boxes):
            object = {}
            object["class"] = class_
            object["x"] = int(box_[0][0])
            object["y"] = int(box_[0][1])
            object["width"] = int(box_[1][0] - box_[0][0])
            object["height"] = int(box_[1][1] - box_[0][1])
        img_object.append(object)
    else:
        for class_, box_ in zip(np_classes, np_boxes):
            object = {}
            object["class"] = class_
            for box_idx, box in enumerate(box_):
                object["x"] = int(np_boxes[box_idx][0][0])
                object["y"] = int(np_boxes[box_idx][0][1])
                object["width"] = int(np_boxes[box_idx][1][0] - np_boxes[box_idx][0][0])
                object["height"] = int(np_boxes[box_idx][1][1] - np_boxes[box_idx][0][1])
            img_object.append(object)

    single_image = {}
    single_image["index"] = image.split('.')[0]
    single_image["objects"] = img_object
    img_data['imgs'].append(single_image)

print(img_data)
with open('datajson.txt', 'w') as outfile:
    json.dump(img_data, outfile)




{'imgs': [{'index': '00036', 'objects': [{'class': 'bear', 'x': 104, 'y': 228, 'width': 383, 'height': 235}]}, {'index': '00001', 'objects': [{'class': 'bear', 'x': 84, 'y': 212, 'width': 378, 'height': 214}]}, {'index': '00078', 'objects': [{'class': 'bear', 'x': 80, 'y': 161, 'width': 360, 'height': 222}]}, {'index': '00026', 'objects': [{'class': 'bear', 'x': 106, 'y': 221, 'width': 368, 'height': 213}]}, {'index': '00074', 'objects': [{'class': 'bear', 'x': 84, 'y': 170, 'width': 357, 'height': 221}]}, {'index': '00081', 'objects': [{'class': 'bear', 'x': 74, 'y': 157, 'width': 360, 'height': 242}]}, {'index': '00066', 'objects': [{'class': 'bear', 'x': 99, 'y': 182, 'width': 338, 'height': 222}]}, {'index': '00072', 'objects': [{'class': 'bear', 'x': 94, 'y': 169, 'width': 342, 'height': 223}]}, {'index': '00068', 'objects': [{'class': 'bear', 'x': 103, 'y': 172, 'width': 335, 'height': 226}]}, {'index': '00004', 'objects': [{'class': 'bear', 'x': 86, 'y': 214, 'width': 358, 'heig

In [5]:
!unzip bearc.zip

Archive:  bearc.zip
   creating: bearc/
 extracting: bearc/00000.jpg         
 extracting: bearc/00001.jpg         
 extracting: bearc/00002.jpg         
 extracting: bearc/00003.jpg         
 extracting: bearc/00004.jpg         
 extracting: bearc/00005.jpg         
