<a href="https://colab.research.google.com/github/Amaljayaranga/DavisDataset/blob/master/JsonCreationNew.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!unzip 'dogs-jump.zip'

In [4]:
import torchvision
from PIL import Image
import torchvision.transforms as T
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import json

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()

COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

def get_prediction(img_path, threshold):
    img = Image.open(img_path)
    transform = T.Compose([T.ToTensor()])
    img = transform(img)
    pred = model([img])
    pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())]
    pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(pred[0]['boxes'].detach().numpy())]
    pred_score = list(pred[0]['scores'].detach().numpy())
    pred_t = [pred_score.index(x) for x in pred_score if x > threshold][-1]
    pred_boxes = pred_boxes[:pred_t + 1]
    pred_class = pred_class[:pred_t + 1]
    return pred_boxes, pred_class

def object_detection_api(img_path, threshold=0.5, rect_th=3, text_size=3, text_th=3):
  boxes, pred_cls = get_prediction(img_path, threshold)
  img = cv2.imread(img_path)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  for i in range(len(boxes)):
    cv2.rectangle(img, boxes[i][0], boxes[i][1],color=(0, 255, 0), thickness=rect_th)
    cv2.putText(img,pred_cls[i], boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th)
  plt.figure(figsize=(20,30))
  plt.imshow(img)
  plt.xticks([])
  plt.yticks([])
  plt.show()

#object_detection_api('./bird.jpg', threshold=0.5)
#boxes, pred_cls = get_prediction('./2.jpg', threshold=0.6)
#print(pred_cls)
#print(boxes)

img_data = {}
img_data['imgs'] = []
path = './dogs-jump/'

images = os.listdir(path)
for image in images:
    boxes, pred_cls = get_prediction(path+image, threshold=0.6)
    np_boxes = np.array(boxes)
    np_classes = np.array(pred_cls)
    img_object = []

    for class_, box_ in zip(np_classes, np_boxes):
      object = {}
      object["class"] = class_
      object["x"] = int(box_[0][0])
      object["y"] = int(box_[0][1])
      object["width"] = int(box_[1][0] - box_[0][0])
      object["height"] = int(box_[1][1] - box_[0][1])
      img_object.append(object)

    single_image = {}
    single_image["index"] = image.split('.')[0]
    single_image["objects"] = img_object
    img_data['imgs'].append(single_image)

print(img_data)
with open('datajson-d.txt', 'w') as outfile:
    json.dump(img_data, outfile)


{'imgs': [{'index': '00049', 'objects': [{'class': 'dog', 'x': 319, 'y': 270, 'width': 168, 'height': 116}, {'class': 'person', 'x': 368, 'y': 160, 'width': 75, 'height': 138}, {'class': 'dog', 'x': 774, 'y': 172, 'width': 76, 'height': 249}]}, {'index': '00064', 'objects': [{'class': 'person', 'x': 339, 'y': 162, 'width': 82, 'height': 221}, {'class': 'dog', 'x': 718, 'y': 184, 'width': 132, 'height': 236}, {'class': 'cow', 'x': 483, 'y': 334, 'width': 54, 'height': 63}]}, {'index': '00003', 'objects': [{'class': 'person', 'x': 361, 'y': 129, 'width': 86, 'height': 230}, {'class': 'dog', 'x': 448, 'y': 230, 'width': 107, 'height': 146}, {'class': 'dog', 'x': 392, 'y': 264, 'width': 50, 'height': 100}]}, {'index': '00055', 'objects': [{'class': 'person', 'x': 356, 'y': 163, 'width': 100, 'height': 216}, {'class': 'dog', 'x': 459, 'y': 287, 'width': 63, 'height': 105}, {'class': 'dog', 'x': 715, 'y': 187, 'width': 136, 'height': 231}]}, {'index': '00009', 'objects': [{'class': 'dog', 'x