### Download dataset

In [None]:
%%bash
gfileid="1uS3Wi68-e8540HtZp8b9v_uhoDyTS8yL"
destination_dir="./"
destination_path="${destination_dir}dataset.tar.gz"
curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${gfileid}" > /dev/null
curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${gfileid}" -o ${destination_path}

tar -zxf dataset.tar.gz
mv dataset/* .
rm -rf dataset.tar.gz cookie dataset/

In [None]:
import os
import math
from PIL import Image, ImageOps, ImageDraw
import xml.etree.ElementTree as ET
import pandas as pd
import random

### Helper functions


In [None]:
def write_to_file(path, anno_list):
    with open(path, "w") as file:
        file.write("\n".join(anno_list))
def read_file(path):
     with open(path, "r") as file:
        return file.readlines()
def yolo_str(c, x, y, w, h):
    yolo = [c, x, y, w, h]
    return " ".join([str(y) for y in yolo])

def img_to_32_multiplier(img):
    
    w, h= img.size
    padding_right = 0
    padding_bottom = 0
    if w % 32 != 0:
        new_w = math.ceil(w /32) * 32
        padding_right = new_w - w
    if h % 32 != 0:
        new_h = math.ceil(h /32) * 32
        padding_bottom = new_h - h
    border = (0, 0, padding_right, padding_bottom)
    new_img = ImageOps.expand(img, border=border,fill='black')
    return new_img 


def conver_to_new_img(img_path, new_file_suffix="_m32.",save=True):
    img = Image.open(img_path)
    org_w, org_h = img.size
    new_img = img_to_32_multiplier(img)
    # Adjust annotations
    new_w, new_h = new_img.size
    if save:
        new_img.save(img_path.split(".")[0] + new_file_suffix + img_path.split(".")[1])
    return new_img, org_w, org_h

### 1. CARPK_devkit & PUCPR+_devkit

In [None]:
def carpk_to_yolo(text):
    attr = [float(i) for i in text.strip().split(" ")]
    cx, cy = (attr[0] + attr[2]) / 2 ,  (attr[1] + attr[3])/2
    w, h = attr[2] - attr[0] , attr[3] - attr[1] 
    return cx, cy, w, h

In [None]:
new_file_suffix = "_m32."
folders = ["CARPK_devkit/data", "PUCPR+_devkit/data"]

In [None]:
for folder in folders:
    prefix = folder.split("_")[0]
    annotations = [anno for anno in os.listdir(os.path.join(folder, "Annotations")) if "m32" not in img]
    imgs = [ img for img in os.listdir(os.path.join(folder, "Images"))if "m32" not in img]
    
    imgW, imgH = 1280, 736    
    for anno in annotations:
        print(anno)
        if anno.split(".")[0] + ".png" in imgs:
            yolo_list = []
            path = os.path.join(folder, "Annotations", anno)
            with open(path, "r") as file:
                lines = file.readlines()
                if len(lines) < 150:
                    for line in lines:
                        cx, cy, w, h = carpk_to_yolo(line)
                        yolo = yolo_str(0, cx/imgW, cy/imgH, w/imgW, h/imgH)
                        yolo_list.append(yolo)
                    write_to_file(folder + '/Annotations/' + anno.split(".")[0] + new_file_suffix + "txt", yolo_list)

###  2. Vehicules1024

```
Mapping list:
1  car
2  truck
23  ship -> ignore
4  tractor -> ignore
5  camping car -> van
9  van
10 vehicle  -> ignore          
11 pick-up -> car
31 plane -> ignore
```

Format:
the image ID, the coordinates of the center in the image, the
orientation of the vehicle, the 4 coordinates of the 4 corners, the class name, a flag stating
if the target is entirely contained in the image, a flag stating if the vehicle is occluded.

In [None]:
def get_location_and_size(row):
    
    xc, yc = float(row[2]), float(row[3])
    x_coors = []
    for i in range(5,9):
        x_coors.append(float(row[i]))
    y_coors = []
    for j in range(9,13):
        y_coors.append(float(row[j]))
    w = max(x_coors) - min(x_coors)
    h = max(y_coors) - min(y_coors)
    return xc, yc, w, h
def vehica_tranform_class(class_no):
    if class_no in [1, 11]:
        return 0
    if class_no in [2]:
        return 2
    if class_no in [5, 9]:
        return 3

In [None]:
folder = "Vehicules1024/Annotations1024/"
anno_file = "annotation1024.txt"
new_anno_file =  "annotation1024_fixed.txt"

anno_path = os.path.join(folder, new_anno_file)
img_w, img_h = 1024, 1024

bad_annos = []
empty_annos = []
df = pd.read_csv(anno_path, sep=' ', header=-1, dtype=str)

grouped = df.groupby(0)
for name, group in grouped:
    if name not in ['00000165', '00000327', '00000364', '00000581', '00000824', '00000839']:
        anno_text = name + '_co.txt'
        annt_text_path = os.path.join(folder, anno_text)
        filtered_group = group[(group[13] == "1") & (group[14] == "0")]
    #     print(len(filtered_group))
        annos = []
        for row in filtered_group.itertuples():
            c = int(row[13])
            # remove ship plane tractor vehicle
            if c not in [23, 4, 31, 10]:
                c = vehica_tranform_class(c)
                if c != None:
                    xc, yc, w, h = get_location_and_size(row)
                    yolo = yolo_str(c,xc/img_w, yc/img_h, w/img_w, h/img_h)
                    annos.append(yolo)
                else:
                    bad_annos.append(name)
        if len(annos) > 0:
            write_to_file(os.path.join(folder, anno_text), annos)
        else: 
            empty_annos.append(name)

for bad in bad_annos:
    file_path = "Vehicules1024/Annotations1024/" + bad + "_co.txt"
    if os.path.isfile(file_path):
        os.remove(file_path)
for e_name in empty_annos:
    file_path = "Vehicules1024/Annotations1024/" + e_name + "_co.png"
    if os.path.isfile(file_path):
        os.remove(file_path)
    


### 3. Aerial

In [None]:
def aerial_convert(img_str):
    img_path = os.path.join(path,img_str)
    img = Image.open(img_path)
    org_w, org_h = img.size

    # padding
    new_img = img_to_32_multiplier(img)
    new_img.save(img_path.split(".")[0] + new_file_suffix + img_path.split(".")[1])
    # Adjust annotations
    new_w, new_h = new_img.size
    w_ratio, h_ratio = org_w/new_w , org_h / new_h
    txt_path = img_path.split(".")[0] + ".txt"
    ignore_class = [4]
    with open(txt_path, "r") as file:
        new_annos = []
        lines = file.readlines()
        for line in lines:
            figures = [float(n) for n in line.strip().split()]
            if int(figures[0]) not in ignore_class:
                class_no = int(figures[0])
                xc = figures[1] * w_ratio
                yc = figures[2] * h_ratio
                w = figures[3] * w_ratio
                h = figures[4] * h_ratio
                new_annos.append(yolo_str(class_no, xc, yc, w, h))
        write_to_file(txt_path.split(".")[0] + new_file_suffix + txt_path.split(".")[1], new_annos)

    

In [None]:
path = "aerial/"
new_file_suffix = "_m32."
included_extention = ('jpg', 'bmp', 'png', 'gif')
img_list = [ f for f in os.listdir(path) if f.endswith(included_extention)]
for img_str in img_list:
    aerial_convert(img_str)

### 4. VisDron2018

#### Only consider the bounding boxes that score=1, truncation=0, occlusion=0,1

```
Mapping list:
ignored regions(0)  -> ignore
pedestrian(1) -> pedestrian
people(2) -> pedestrian
bicycle(3) -> cyclist
car(4) -> car
van(5) -> van
tricycle(7) -> cyclist
awning-tricycle(8) -> cyclist
bus(9) -> bus
motor(10) -> ignore
others(11) -> ignore
```

```

Format:

<bbox_left>,<bbox_top>,<bbox_width>,<bbox_height>,<score>,<object_category>,<truncation>,<occlusion>

Description:
 
 <bbox_left>	     The x coordinate of the top-left corner of the predicted bounding box

 <bbox_top>	     The y coordinate of the top-left corner of the predicted object bounding box

 <bbox_width>	     The width in pixels of the predicted object bounding box

<bbox_height>	     The height in pixels of the predicted object bounding box

   <score>	     The score in the DETECTION file indicates the confidence of the predicted bounding box enclosing 
an object instance. The score in GROUNDTRUTH file is set to 1 or 0. 1 indicates the bounding box is considered in evaluation,while 0 indicates the bounding box will be ignored.
                      
<object_category>    The object category indicates the type of annotated object, (i.e., ignored regions(0), pedestrian(1), people(2), bicycle(3), car(4), van(5), truck(6), tricycle(7), awning-tricycle(8), bus(9),motor(10), others(11))
                      
<truncation>	     The score in the DETECTION result file should be set to the constant -1.The score in the GROUNDTRUTH file indicates the degree of object parts appears outside a frame (i.e., no truncation = 0 (truncation ratio 0%), and partial truncation = 1 (truncation ratio 1% ~ 50%)).
                      
<occlusion>	     The score in the DETECTION file should be set to the constant -1. The score in the GROUNDTRUTH file indicates the fraction of objects being occluded (i.e., no occlusion = 0 (occlusion ratio 0%), partial occlusion = 1 (occlusion ratio 1% ~ 50%), and heavy occlusion = 2 (occlusion ratio 50% ~ 100%))

```

In [None]:
def visDrone_transform_class(class_no):
    if class_no == 4:
        return 0
    if class_no == 6:
        return 1
    if class_no == 9:
        return 2
    if class_no == 5:
        return 3
    if class_no in [3, 6, 7, 8]:
        return 4
    if class_no in [1, 2]:
        return 5
    return 100

In [None]:
folder = "VisDrone2018-DET-train/"
included_extention = ('jpg', 'bmp', 'png', 'gif')
img_names = [ f for f in os.listdir(os.path.join(folder, 'images')) if f.endswith(included_extention)]

for img_name in img_names:
    print(img_name)
    new_file_suffix = "_m32."
    img_path = os.path.join(folder, 'images', img_name)
    anno_path = os.path.join(folder, 'annotations', img_name.split(".")[0] + '.txt')
    new_anno_path = anno_path.split(".")[0] + new_file_suffix + 'txt'

    new_img, org_w, org_h = conver_to_new_img(img_path, new_file_suffix, False)
    new_w, new_h = new_img.size
    w_ratio, h_ratio = org_w/new_w , org_h / new_h

    with open(anno_path, 'r') as file:
        lines = file.readlines()
        annos = []
        for line in lines:
            attrs = line.strip().split(",")
            c = visDrone_transform_class(int(attrs[5]))
            if c!= 100 and attrs[4] == "1" and attrs[-2] == "0" and attrs[-1] in ["0","1"]:
                w = float(attrs[2])
                h = float(attrs[3])
                xc = float(attrs[0]) + w/2
                yc = float(attrs[1]) + h/2
                yolo = yolo_str(c, xc/new_w, yc/new_h, w/new_w, h/new_h)
                annos.append(yolo)
        write_to_file(new_anno_path, annos)


In [None]:
img_path

In [None]:
anno_path

In [None]:
new_anno_path

### 5. UAV-benchmark-M

#### Only consider the bounding boxes where out-of-view=1, occlusion=1,3,4




```
Mapping list:
car(1) -> car
truck(2) -> truck
bus(3) -> bus


Format:

<frame_index>,<target_id>,<bbox_left>,<bbox_top>,<bbox_width>,<bbox_height>,<out-of-view>,<occlusion><object_category>

Description:

    <frame_index> The frame index of the video frame
       
    <target_id>	  In the GROUNDTRUTH file, the identity of the target is used to provide the temporal         
                  corresponding relation of the bounding boxes in different frames.
				  
    <bbox_left>	  The x coordinate of the top-left corner of the predicted bounding box
	
    <bbox_top>	  The y coordinate of the top-left corner of the predicted object bounding box
	
    <bbox_width>  The width in pixels of the predicted object bounding box
	
    <bbox_height> The height in pixels of the predicted object bounding box
	
    <out-of-view> The score in the GROUNDTRUTH file indicates the degree of object parts appears outside a frame 
                   (i.e., 'no-out'= 1,'medium-out' =2,'small-out'=3).
                   
    <occlusion>	  The score in the GROUNDTRUTH fileindicates the fraction of objects being occluded.(i.e.,'no-occ'=1,'lagre-occ'=2,'medium-occ'=3,'small-occ'=4).

    <object_category> The object category indicates the type of annotated object, (i.e.,car(1), truck(2), bus(3))
```

In [None]:
def Uav_transfomr_class(class_no):
    return class_no - 1

In [None]:
folder = "UAV-benchmark-M/"
folder_list = [f for f in os.listdir(folder) if "M" in f]
anno_path = os.path.join(folder, "GT")


In [None]:
for folder_name in folder_list:
    print(folder_name)
    anno_file = os.path.join(anno_path, folder_name + "_gt_whole.txt")
    print(anno_file)
    df = pd.read_csv(anno_file, sep=',', header=-1, dtype=str)
    grouped = df.groupby(0)
    
    for name, group in grouped:    
        anno_text_name = folder_name + '_img{0:06d}_m32.txt'.format(int(name))
        img_name = 'img{0:06d}.jpg'.format(int(name))
        img_path = os.path.join(folder, folder_name, img_name)
        new_img, _, _, = conver_to_new_img(img_path, new_file_suffix, False)
        new_img.save( os.path.join(folder, folder_name,folder_name + '_img{0:06d}_m32.jpg'.format(int(name))))
        new_w, new_h = new_img.siz
        annos = []
        for row in group.itertuples():
            if row[-2] in ["1", "4", "3"] and row[-3] == "1":
                        w = float(row[5])
                        h = float(row[6])
                        xc = float(row[3]) + w/2
                        yc = float(row[4]) + h/2
                        c = Uav_transfomr_class(int(row[-1]))
                        yolo = yolo_str(c, xc/new_w, yc/new_h, w/new_w, h/new_h)
                        annos.append(yolo)
            write_to_file(os.path.join(folder, folder_name, anno_text_name), annos)

### 6. Stanford Drone Dataset 


### This code only converts annotation files  conversion of the videos to images is done separately. Only consider the bounding boxes where occluded=0 , lost=0

### Only the video0 of each scene is selected into the final aerial_dataset

```
Mapping list:
Car -> car
Cart -> truck
Bus -> bus
Biker -> cyclist
Pedestrian -> pedestrian
Skater -> pedestrian

Format:

1   Track ID. All rows with the same ID belong to the same path.
2   xmin. The top left x-coordinate of the bounding box.
3   ymin. The top left y-coordinate of the bounding box.
4   xmax. The bottom right x-coordinate of the bounding box.
5   ymax. The bottom right y-coordinate of the bounding box.
6   frame. The frame that this annotation represents.
7   lost. If 1, the annotation is outside of the view screen.
8   occluded. If 1, the annotation is occluded.
9   generated. If 1, the annotation was automatically interpolated.
10  label. The label for this annotation, enclosed in quotation marks.
11+ attributes. Each column after this is an attribute.

```




In [None]:
# Define class number
def st_uav_class_transform(cls_name):
    if cls_name == 'Car':
        return 0
    if cls_name == 'Cart':
        return 1
    if cls_name == 'Bus':
        return 2
    if cls_name == 'Biker':
        return 4
    if cls_name in ['Pedestrian', 'Skater'] :
        return 5
    

In [None]:
path = "st_uav/annotations/"
scences = os.listdir(path)

# get all annotation.txt
anno_txts = []
for s in scences:
    if not s.startswith("."):
        videos = os.listdir(path + s)
        for v in videos:
            if not v.startswith("."):
                anno_txts.append(os.path.join(path + s + "/" + v +"/annotations.txt"))
                
## Convert annotations and split them frame by frame
for t in [a for a in anno_txts if "video0" in a]:
    print(t)
    img_path  = t.replace("annotations.txt", "reference.jpg")
    img_w, img_h = Image.open(img_path).size


    df = pd.read_csv(t, sep=' ', header=-1, dtype=str)
    grouped = df.groupby(5)
    all_annos = []
    frames_dir = t.replace("annotations.txt", "") + "frames/"
    
    if not os.path.isdir(frames_dir):
        os.mkdir(frames_dir)
    for name, group in grouped:
        frame_name = "{:06}".format(int(name) + 1)
        frame_annos = []
        for row in group.itertuples():
            if row[6] != "1" and row[7] != "1":
                xc = (int(row[4]) + int(row[2])) / (2 * img_w)
                yc = (int(row[5]) + int(row[3])) / (2 * img_h)
                w = (int(row[4]) - int(row[2])) / img_w
                h = (int(row[5]) - int(row[3])) / img_h
                c = st_uav_class_transform(row[10])
                anno = yolo_str(c, xc, yc, w, h)
                frame_annos.append(anno)
                anno = frame_name + " " + anno
                all_annos.append(anno)
        write_to_file(frames_dir + frame_name + ".txt", frame_annos)
        
    write_to_file(t.replace("annotations.txt", "yolo_annotations_all.txt"), all_annos)

### Generate  train and test set
1. Shuffle all the images in aerial_dataset folder and split the them into train and test set
2. Save the file path to train.txt and test.txt

In [None]:
path  = '/Users/sonychan/Downloads/'
target = "aerial_dataset/"

train_list = []
test_list = []
split_ratio = 0.8
f_list =  [a for a in os.listdir(path + target) if not a.startswith('.') and not a.endswith('.txt')]
for f in f_list:
    print(f)
    file_folder = path + target + f
    imgs = [e for e in os.listdir(file_folder) if e.endswith(formats) and not e.startswith('.')]
    imgs_path = [ target + f + "/" +img for img in imgs]
    random.shuffle(imgs_path)
    if f == "uav":
        imgs_path = imgs_path[:int(len(imgs_path) / 3)]
    size = len(imgs_path)
    print(size)
    train = imgs_path[: int(size * split_ratio)]
    test = imgs_path[int(size * split_ratio):]
    train_list += train
    test_list += test
write_to_file("train.txt", train_list)
write_to_file("test.txt", test_list)


### Check correctness
1. Read train.txt and test.txt.
2. Check if all the images in train.txt and test.txt have corresponding anntations.
3. Count statistics of the dataset. 

In [None]:
no_anno = set()
no_img = set()
bad_anno = []
path  = '/Users/sonychan/Downloads/aerial_dataset/'
f_list =  [a for a in os.listdir(path) if not a.startswith('.') and not a.endswith(".txt")]
counter = 0
cata = [0,0,0,0, 0,0]
formats = ('jpg', 'png')
no_image = 0
for f in f_list:
    print(f)
    file_folder = os.path.join(path, f) 
    annos = set((e.split(".")[0] for e in os.listdir(file_folder) if ".txt" in e and not e.startswith('.')))
    imgs = set((e.split(".")[0] for e in os.listdir(file_folder) if e.endswith(formats) and not e.startswith('.')))
    has_both = annos.intersection(imgs)
    no_anno.union(annos.difference(has_both))
    no_img.union(imgs.difference(has_both))
    print("no_annotation_txt: {}".format(no_anno))
    print("no_image_txt: {}".format(no_img))
    for anno in annos:
        no_image += 1
        with open(file_folder + "/" + anno + ".txt", "r") as file:
            lines = file.readlines()
            counter += len(lines)
            bad_lines = [line for line in lines if len(line.split(" ")) != 5]
            for line in lines:
                try:
                    class_no = line.split()[0]
                    cata[int(class_no)] += 1
                except:
                    print(anno)
                    print(class_no)
                
            if len(bad_lines) != 0:
                bad_anno.append(anno)
print(bad_anno)
print("Number of images: {}".format(no_image))
print("Number of vehicles: {}".format(counter))
print("Number of cars: {}".format(cata[0]))
print("Number of trucks: {}".format(cata[1]))
print("Number of buses: {}".format(cata[2]))
print("Number of vans: {}".format(cata[3]))
print("Number of Cyclist: {}".format(cata[4]))
print("Number of Pedestrian: {}".format(cata[5]))