In [5]:
import os
import re
from xml.etree import ElementTree, ElementInclude
from collections import Counter

import iptcinfo3
from iptcinfo3 import IPTCInfo
from PIL import Image
import cv2

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

import torch
import torchvision

In [6]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, root = '/home/simon/Documents/Bodies/data/jeppe/', transforms = None, n_obs = 100):
        self.root = root
        self.transforms = transforms
        self.n_obs = n_obs

        # the selection need to happen here
        self.classes = [''] + self.__get_classes__() # list of classes accroding to n_obs, see __get_classes__
        self.classes_int = np.arange(0,len(self.classes)) # from 1 since no background '0'
        self.boxes = self.__get_boxes__() # list of xml files (box info) to n_obs, see __get_classes__
        self.imgs = [f"{i.split('.')[0]}.jpg" for i in self.boxes] # list of images - only take images with box info! and > n_obs
             
    def __get_classes__(self):
        """Creates a list of classes with >= n_obs observations"""
        n_obs = self.n_obs
        path = os.path.join(self.root, "images")

        obj_name = []
        classes = []

        # Get all objects that have been annotated
        for filename in os.listdir(path):
            if filename.split('.')[1] == 'xml':
                box_path = os.path.join(path, filename)

                tree = ElementTree.parse(box_path)
                lst_obj = tree.findall('object')

                for j in lst_obj:
                    obj_name.append(j.find('name').text)


        # now, only keep the objects w/ >= n_obs observations
        c = Counter(obj_name)

        for i in c.items():
            if i[1] >= n_obs:
                classes.append(i[0])
        
        return(classes)

    def __get_boxes__(self):
        """Make sure you only get images with valid boxes frrom the classes list - see __get_classes__"""

        path = os.path.join(self.root, "images")

        boxes = []
        # Get all objects that have been annotated
        for filename in os.listdir(path):
            if filename.split('.')[1] == 'xml':
                box_path = os.path.join(path, filename)

                tree = ElementTree.parse(box_path)
                lst_obj = tree.findall('object')

                # If there is one or more objects from the classes list, save the box filename
                if len(set([j.find('name').text for j in lst_obj]) & set(self.classes)) > 0:
                    boxes.append(filename)

        # Sort and return the boxes
        boxes = sorted(boxes)
        return(boxes)

    def __getitem__(self, idx):
        # dict to convert classes into classes_int
        class_to_int = dict(zip(self.classes,self.classes_int))        

        # load images
        img_path = os.path.join(self.root, "images", self.imgs[idx])
        box_path = os.path.join(self.root, "images", self.boxes[idx])
        
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Resize img 800x800 --------------------------------------------
        target_size = 800

        y_orig_size = img.shape[0] # the original y shape
        x_orig_size = img.shape[1] # the original x shape
        y_scale = target_size/y_orig_size # scale factor for boxes
        x_scale = target_size/x_orig_size # scale factor for boxes

        img = cv2.resize(img, (target_size, target_size))
        # ----------------------------------------------------------------

        img = np.moveaxis(img, -1, 0) # move channels in front so h,w,c -> c,h,w
        img = img / 255.0 # norm ot range 0-1. Might move out..
        img = torch.Tensor(img)

        # Open xml path 
        tree = ElementTree.parse(box_path)

        lst_obj = tree.findall('object')

        obj_name = []
        obj_ids = []
        boxes = []

        for i in lst_obj:
        # here you need to ignore classes w/ n > n_obs

            obj_name_str = i.find('name').text
            if obj_name_str in self.classes:

                obj_name.append(obj_name_str) # get the actual class name
                obj_ids.append(class_to_int[i.find('name').text]) # get the int associated with the class name
                lst_box = i.findall('bndbox')

                for j in lst_box:

                    xmin = float(j.find('xmin').text) * x_scale # scale factor to fit resized image
                    xmax = float(j.find('xmax').text) * x_scale
                    ymin = float(j.find('ymin').text) * y_scale
                    ymax = float(j.find('ymax').text) * y_scale
                    boxes.append([xmin, ymin, xmax, ymax])
            else:
                pass

        num_objs = len(obj_ids) # number of objects

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(obj_ids, dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes 
        target["labels"] = labels
        target["image_id"] = image_id 
        target["area"] = area
        target["iscrowd"] = iscrowd 

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.imgs) # right now you do not differentiate between annotated images and not annotated images... 


    def target_classes(self):
        t_inst_classes = dict(zip(self.classes_int,self.classes)) # just a int to string dict
        return(t_inst_classes)

    def coco_classes(self):
        inst_classes = [
            '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
            'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
            'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
            'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
            'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
            'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
            'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
            'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
            'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
            'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
            'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
            'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # a "ordered" list of the coco categories
        return(inst_classes) 

In [7]:
dataset = MyDataset(n_obs=500)

In [14]:
print('length of dataset = ', len(dataset), '\n')

img, target = dataset[6]
print(img.shape, '\n',target)

length of dataset =  769 

torch.Size([3, 800, 800]) 
 {'boxes': tensor([[367.0213,  53.4694, 539.0958, 387.3469],
        [365.9575,  54.2857, 539.6277, 390.6122],
        [363.2979,  50.2041, 541.2234, 394.2857],
        [355.5851,  47.7551, 543.6170, 419.1837],
        [ 12.7660,  18.3673, 180.8511, 316.7347],
        [  9.3085,  17.5510, 189.6277, 338.3673],
        [ 17.5532,  21.6327, 180.8511, 336.7347],
        [  4.5213,  11.8367, 185.9043, 357.5510]]), 'labels': tensor([1, 2, 3, 4, 1, 2, 3, 4]), 'image_id': tensor([6]), 'area': tensor([57451.8086, 58409.9062, 61220.8984, 69840.4219, 50151.1055, 57849.3203,
        51455.4883, 62706.6875]), 'iscrowd': tensor([0, 0, 0, 0, 0, 0, 0, 0])}


In [5]:
if 'b' in ['e','b']:
    print('cool')

cool


In [23]:
list1 = ['a','g','c']
list2 = ['b','j','f']

len(set(list1) & set(list2)) > 0

False

In [24]:
sorted(list1)

['a', 'c', 'g']

In [16]:
list1 = np.array( ['a','b','c'])
list2 = np.array(['b','a','f'])
list1.any(list2)

TypeError: cannot perform reduce with flexible type

In [3]:
dataset = MyDataset()


In [4]:
dataset.__getitem__(199)[0].shape

torch.Size([3, 800, 800])

In [5]:
dataset.__getitem__(np.random.randint(1,100))

(tensor([[[0.2627, 0.2549, 0.2627,  ..., 0.5529, 0.5529, 0.5333],
          [0.2549, 0.2706, 0.2627,  ..., 0.5451, 0.5451, 0.5333],
          [0.2588, 0.2510, 0.2588,  ..., 0.5373, 0.5529, 0.5451],
          ...,
          [0.1137, 0.1137, 0.1059,  ..., 0.4275, 0.4275, 0.4196],
          [0.1294, 0.1176, 0.1216,  ..., 0.4235, 0.4235, 0.4157],
          [0.1412, 0.1255, 0.1451,  ..., 0.4196, 0.4235, 0.4118]],
 
         [[0.2078, 0.2118, 0.2275,  ..., 0.4980, 0.4980, 0.4784],
          [0.2000, 0.2196, 0.2275,  ..., 0.4902, 0.4902, 0.4784],
          [0.2000, 0.1961, 0.2157,  ..., 0.4824, 0.4980, 0.4902],
          ...,
          [0.2000, 0.2000, 0.1922,  ..., 0.3765, 0.3765, 0.3686],
          [0.2039, 0.1961, 0.1961,  ..., 0.3725, 0.3725, 0.3647],
          [0.2157, 0.2039, 0.2196,  ..., 0.3686, 0.3725, 0.3608]],
 
         [[0.0941, 0.0941, 0.1059,  ..., 0.2510, 0.2510, 0.2314],
          [0.0863, 0.1059, 0.1059,  ..., 0.2431, 0.2431, 0.2314],
          [0.0863, 0.0824, 0.0980,  ...,

In [10]:
print('length of dataset = ', len(dataset), '\n')
img, target = dataset[78]
print(img.shape, '\n',target)

length of dataset =  816 

torch.Size([3, 800, 800]) 
 {'boxes': tensor([[139.5626, 300.9063, 273.7574, 470.3928],
        [138.5686, 301.8127, 274.3539, 476.4351],
        [135.1889, 297.5831, 270.3777, 473.1118],
        [137.3757, 303.6254, 272.7634, 468.8822],
        [331.8091, 169.1843, 469.1849, 474.6223],
        [331.8091, 177.6435, 468.1909, 474.6223],
        [329.0258, 181.8731, 470.3777, 477.3414],
        [325.6461, 175.2266, 469.1849, 479.7583]]), 'labels': tensor([1, 2, 3, 7, 1, 2, 3, 7]), 'image_id': tensor([78]), 'area': tensor([22744.2012, 23711.1504, 23729.5293, 22373.7363, 41959.7773, 40502.4805,
        41765.0039, 43712.1016]), 'iscrowd': tensor([0, 0, 0, 0, 0, 0, 0, 0])}


In [11]:
test_img = cv2.imread('/home/simon/Documents/Bodies/data/jeppe/images/JS67.jpg')
test_img.shape

(1960, 3008, 3)

In [8]:
dataset.classes

['',
 'person',
 'adult',
 'male',
 'firearm',
 'female',
 'religious_garment_female',
 'uniformed',
 'child',
 'youth']

In [39]:
new_classes = []
new_classes.append('')
new_classes

['']

In [117]:
boxes = []
len(torch.as_tensor(boxes, dtype=torch.float32)) == 0

True

In [15]:
dataset = MyDataset()


TypeError: unhashable type: 'dict'

In [19]:
dataset.__getitem__(np.random.randint(1,100))

TypeError: unhashable type: 'dict'

In [86]:
dataset = MyDataset()
['background'] + dataset.classes

['background',
 'person',
 'adult',
 'male',
 'firearm',
 'female',
 'religious_garment_female',
 'uniformed',
 'child',
 'youth']

In [124]:
dataset.__getitem__(0)[0].shape

torch.Size([3, 1960, 3008])

In [None]:
def cvTest():
    # imageToPredict = cv2.imread("img.jpg", 3)
    imageToPredict = cv2.imread("49466033\\img.png ", 3)
    print(imageToPredict.shape)

    # Note: flipped comparing to your original code!
    # x_ = imageToPredict.shape[0]
    # y_ = imageToPredict.shape[1]
    y_ = imageToPredict.shape[0]
    x_ = imageToPredict.shape[1]

    targetSize = 416
    x_scale = targetSize / x_
    y_scale = targetSize / y_
    print(x_scale, y_scale)
    img = cv2.resize(imageToPredict, (targetSize, targetSize));
    print(img.shape)
    img = np.array(img);

    # original frame as named values
    (origLeft, origTop, origRight, origBottom) = (160, 35, 555, 470)

    x = int(np.round(origLeft * x_scale))
    y = int(np.round(origTop * y_scale))
    xmax = int(np.round(origRight * x_scale))
    ymax = int(np.round(origBottom * y_scale))
    # Box.drawBox([[1, 0, x, y, xmax, ymax]], img)
    drawBox([[1, 0, x, y, xmax, ymax]], img)


cvTest()