In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
import json
import math
import os 
import torch


In [59]:
### Read all existing boxes in an image
### Returns a list of all boxes coordinates, label(integer) and number of objects

def read_number_boxes(img):
    path = 'trodo-v01/pascal voc 1.1/Annotations/'
    img = img.split('.')[0]
    tree = ET.parse(path+img+'.xml')
    root = tree.getroot()
    labels = []
    box = []
    boxes = []
    pos = []    
    for elem in root:
        if elem.tag == 'object':
            for i in range(0,len(elem)):
                if elem[i].text == 'odometer' or elem[i].text == 'X' or elem[i].text == ',':
                    break
                if elem[i].tag == 'name':
                    labels.append(int(elem[i].text))
                if elem[i].tag == 'bndbox':
                    for k in range(0,len(elem[i])):
                        coord = elem[i][k].text
                        box.append(coord)

                        
    num_obj = len(labels)
    for z in range(0,num_obj):
        xmin = math.floor(float(box[z*4]))
        ymin = math.floor(float(box[z*4+1]))
        xmax = math.ceil(float(box[z*4+2]))
        ymax = math.ceil(float(box[z*4+3]))
        boxes.append([xmin,ymin,xmax,ymax]) 

    return boxes, labels, num_obj

#read_number_boxes('00000002-PHOTO-2020-11-20-11-21-22.jpg')

def filelist(root, file_type):
    """Returns a fully-qualified list of filenames under root directory"""
    return [os.path.join(directory_path, f) for directory_path, directory_name, 
            files in os.walk(root) for f in files if f.endswith(file_type)]

def generate_train_df (anno_path):
    annotations = filelist(anno_path, '.xml')
    anno_list = []
    for anno_path in annotations:
        root = ET.parse(anno_path).getroot()

        boxes = []

        #boxes.append([xmin, ymin, xmax, ymax])
        anno = {}
        anno['width'] = root.find("./size/width").text
        anno['height'] = root.find("./size/height").text
        #print(anno_path.split('/')[-1])
        name = anno_path.split('/')[-1].split('.')[0]
        #print(name.join('jpg'))
        anno['image'] = name + '.jpg'
        anno['boxes'], anno['labels'], anno['num_obj'] = read_number_boxes(anno['image'])
        anno_list.append(anno)
    return pd.DataFrame(anno_list)

pdXML = generate_train_df('trodo-v01/pascal voc 1.1/Annotations/')

pdXML

Unnamed: 0,width,height,image,boxes,labels,num_obj
0,900,1600,00001909-PHOTO-2020-12-20-10-36-05.jpg,"[[325, 815, 361, 857], [238, 818, 273, 862], [...","[8, 0, 0, 1, 1, 1]",6
1,472,1024,00000095-PHOTO-2020-11-21-20-48-57.jpg,"[[200, 485, 211, 501], [186, 503, 198, 521], [...","[5, 2, 2, 2, 0, 0, 0, 1, 9, 9, 9, 7, 3, 3, 3]",15
2,766,1024,00000563-PHOTO-2020-11-30-13-25-25.jpg,"[[435, 363, 475, 410], [342, 379, 384, 434], [...","[8, 2, 0, 0, 7, 7, 5, 6]",8
3,768,1024,00000241-PHOTO-2020-11-24-08-54-09.jpg,"[[230, 423, 242, 444], [191, 423, 205, 444], [...","[7, 5, 1, 1, 9]",5
4,576,1024,00001513-PHOTO-2020-12-15-22-19-59.jpg,"[[328, 588, 339, 614], [263, 586, 274, 610], [...","[1, 1, 0, 6, 4, 0]",6
...,...,...,...,...,...,...
2384,768,1024,00001143-PHOTO-2020-12-10-11-20-27.jpg,"[[361, 428, 388, 460], [395, 423, 416, 449], [...","[3, 5, 0, 0, 6, 1]",6
2385,575,1024,00001218-PHOTO-2020-12-10-11-47-26.jpg,"[[258, 426, 279, 450], [276, 412, 300, 437], [...","[5, 3, 3, 9, 5, 4, 3, 0, 0, 0, 0]",11
2386,768,1024,00000166-PHOTO-2020-11-22-21-02-40.jpg,"[[438, 405, 490, 463], [281, 363, 305, 396], [...","[5, 4, 7, 0, 0, 3, 3, 8, 8, 0, 4]",11
2387,768,1024,00001422-PHOTO-2020-12-15-22-17-24.jpg,"[[369, 292, 419, 356], [153, 265, 164, 296], [...","[7, 2, 1, 3, 6, 0, 0, 0]",8


#  Defining the dataset

In [9]:

class TrodoDataset(torch.utils.data.Dataset):
    def __init__(self, root, data_file, transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = sorted(os.listdir(os.path.join(root, "images")))
        self.data_file = data_file
        #self.path_to_data_file = data_file
    def __getitem__(self, idx):
      # load images and bounding boxes
        img_path = os.path.join(self.root, "images", self.imgs[idx])
        img = Image.open(img_path).convert("RGB")
        label= get_labels(self.data_file,idx)
        box_list = read_odometer_boxes(self.imgs[idx])
        #box_list = parse_one_annot(self.path_to_data_file, self.imgs[idx])
        boxes = torch.as_tensor(box_list, dtype=torch.float32)
        num_objs = 1
      # there is only one class
        labels = torch.tensor((label,), dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:,
      0])
      # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        if self.transforms is not None:
            img, target = self.transforms(img, target)
        return img, target
    def __len__(self):
         return len(self.imgs)

In [None]:
def __getitem__(self, idx):
    # load images ad masks
    img_path = os.path.join(self.root, "images", self.imgs[idx])
    #mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
    img = Image.open(img_path).convert("RGB")
    # note that we haven't converted the mask to RGB,
    # because each color corresponds to a different instance
    # with 0 being background
    mask = Image.open(mask_path)

    mask = np.array(mask)
    # instances are encoded as different colors
    obj_ids = np.unique(mask)
    # first id is the background, so remove it
    obj_ids = obj_ids[1:]

    # split the color-encoded mask into a set
    # of binary masks
    masks = mask == obj_ids[:, None, None]

    # get bounding box coordinates for each mask
    num_objs = len(obj_ids)
    boxes = []
    for i in range(num_objs):
        pos = np.where(masks[i])
        xmin = np.min(pos[1])
        xmax = np.max(pos[1])
        ymin = np.min(pos[0])
        ymax = np.max(pos[0])
        boxes.append([xmin, ymin, xmax, ymax])

    
    boxes = torch.as_tensor(boxes, dtype=torch.float32)
    **# there is only one class
    labels = torch.ones((num_objs,), dtype=torch.int64)**
    masks = torch.as_tensor(masks, dtype=torch.uint8)

    image_id = torch.tensor([idx])
    area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
    # suppose all instances are not crowd
    iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

    target = {}
    target["boxes"] = boxes
    target["labels"] = labels
    target["masks"] = masks
    target["image_id"] = image_id
    target["area"] = area
    target["iscrowd"] = iscrowd

    if self.transforms is not None:
        img, target = self.transforms(img, target)

    return img, target

def __len__(self):
    return len(self.imgs)

In [62]:
#Since nested JSON this don´t work 
#ground_truth = pd.read_json(‘groundTruth/groundtruth.json’)

# Beacuse the nested JSON this needs to be done. 
with open('trodo-v01/ground truth/groundtruth.json','r') as f:
    groundTruth = json.loads(f.read())

groundTruth_DF = pd.json_normalize(groundTruth, record_path =['odometers'])
#print(groundTruth_DF)
#mileage = groundTruth_DF['mileage']

    #Merging the two DataFrames 
merged_JSON_XML_DF = pd.merge(groundTruth_DF,pdXML, how='left',left_on='image',right_on='image')

    # To see all the rows and be ablse to scroll the DataFrame 
pd.set_option('display.max_rows', 100)

    # Change the odometer type to binary where analog odometer is 0 and automatic is 1  
#class_dict = {'analog': 0, 'digital': 1}
#merged_JSON_XML_DF['odometer_type']= merged_JSON_XML_DF['odometer_type'].apply(lambda x:  class_dict[x])
    # Make the numbers to Int and the boundary box bigger 
merged_JSON_XML_DF['mileage']= merged_JSON_XML_DF['mileage'].apply(lambda x: int(float(x)))


    #Print the merged dataframe
merged_JSON_XML_DF
#244362	768	1024	odometer	249.0	399.21	

Unnamed: 0,image,odometer_type,mileage,width,height,boxes,labels,num_obj
0,00000002-PHOTO-2020-11-20-11-21-22.jpg,analog,244362,768,1024,"[[291, 408, 315, 439], [325, 407, 350, 439], [...","[4, 4, 2, 2, 6, 3]",6
1,00000003-PHOTO-2020-11-20-11-21-23.jpg,digital,64750,768,1024,"[[381, 421, 393, 438], [347, 454, 358, 473], [...","[0, 2, 2, 3, 1, 6, 5, 4, 7, 2]",10
2,00000004-PHOTO-2020-11-20-11-21-25.jpg,digital,159073,1024,768,"[[603, 404, 629, 446], [590, 406, 600, 445], [...","[5, 1, 2, 4, 9, 3, 7, 0, 9, 0, 2]",11
3,00000005-PHOTO-2020-11-20-11-21-26.jpg,analog,18613,576,1024,"[[319, 612, 333, 626], [305, 597, 312, 616], [...","[3, 1, 6, 8, 1, 0]",6
4,00000006-PHOTO-2020-11-20-11-21-26.jpg,analog,35376,768,1024,"[[433, 510, 468, 547], [320, 562, 354, 600], [...","[6, 5, 3, 3, 0, 7, 0, 4, 0, 3, 0, 7]",12
...,...,...,...,...,...,...,...,...
2384,00006473-PHOTO-2020-12-29-21-24-53.jpg,digital,118818,901,1600,"[[416, 765, 445, 809], [442, 676, 465, 719], [...","[2, 9, 4, 8, 8, 8, 2, 2, 1, 1, 1, 1, 3]",13
2385,00006474-PHOTO-2020-12-29-21-31-54.jpg,analog,27203,747,1328,"[[441, 717, 464, 744], [320, 705, 345, 735], [...","[3, 2, 0, 0, 7, 2]",6
2386,00006496-PHOTO-2020-12-29-21-34-33.jpg,analog,17611,1200,1600,"[[478, 961, 493, 983], [551, 966, 585, 1003], ...","[0, 7, 6, 1, 1, 1, 0]",7
2387,00006499-PHOTO-2020-12-29-21-58-07.jpg,digital,30076,738,1600,"[[322, 842, 338, 865], [341, 870, 353, 896], [...","[3, 1, 2, 5, 6, 6, 7, 0, 0]",9


In [63]:
merged_JSON_XML_DF.to_csv('dataFrameMultipleDigitBoxes.csv')
