In [1]:
# import the packages
from shutil import copyfile
from imutils import paths
import numpy as np
import csv
import cv2
import os

In [2]:
def make_dir(dirName):
    # Create a target directory & all intermediate 
    # directories if they don't exists
    if not os.path.exists(dirName):
        os.makedirs(dirName, exist_ok = True)
        print("[INFO] Directory " , dirName ,  " created")
    else:
        print("[INFO] Directory " , dirName ,  " already exists")

In [49]:
# Define dataset directories
SOURCE_PATH = 'animalweb'
SORTED_PATH = 'animalweb_sorted'
TARGET_PATH = 'dataset/animalweb'

make_dir(SORTED_PATH)

# Create directories for training, validation 
# and testing sets
TRAIN_IMG_DIR = 'train/images'
TRAIN_LAB_DIR = 'train/labels'
make_dir(os.path.join(TARGET_PATH, TRAIN_IMG_DIR))
make_dir(os.path.join(TARGET_PATH, TRAIN_LAB_DIR))

TEST_IMG_DIR = 'test/images'
TEST_LAB_DIR = 'test/labels'
make_dir(os.path.join(TARGET_PATH, TEST_IMG_DIR))
make_dir(os.path.join(TARGET_PATH, TEST_LAB_DIR))

VAL_IMG_DIR = 'val/images'
VAL_LAB_DIR = 'val/labels'
make_dir(os.path.join(TARGET_PATH, VAL_IMG_DIR))
make_dir(os.path.join(TARGET_PATH, VAL_LAB_DIR))

[INFO] Directory  animalweb_sorted  already exists
[INFO] Directory  dataset/animalweb/train/images  already exists
[INFO] Directory  dataset/animalweb/train/labels  already exists
[INFO] Directory  dataset/animalweb/test/images  already exists
[INFO] Directory  dataset/animalweb/test/labels  already exists
[INFO] Directory  dataset/animalweb/val/images  already exists
[INFO] Directory  dataset/animalweb/val/labels  already exists


In [50]:
# Get paths to the images and annotations
annotPaths = list(paths.list_files(SOURCE_PATH, validExts="pts"))
annotPaths = sorted(annotPaths)

imagePaths = list(paths.list_files(SOURCE_PATH, validExts="jpg"))
imagePaths = sorted(imagePaths)

print(len(annotPaths), len(imagePaths))

22451 22451


In [51]:
imagePaths[:20]

['animalweb/aardvark_1.jpg',
 'animalweb/aardvark_10.jpg',
 'animalweb/aardvark_11.jpg',
 'animalweb/aardvark_12.jpg',
 'animalweb/aardvark_13.jpg',
 'animalweb/aardvark_14.jpg',
 'animalweb/aardvark_15.jpg',
 'animalweb/aardvark_16.jpg',
 'animalweb/aardvark_18.jpg',
 'animalweb/aardvark_19.jpg',
 'animalweb/aardvark_2.jpg',
 'animalweb/aardvark_20.jpg',
 'animalweb/aardvark_21.jpg',
 'animalweb/aardvark_22.jpg',
 'animalweb/aardvark_23.jpg',
 'animalweb/aardvark_24.jpg',
 'animalweb/aardvark_25.jpg',
 'animalweb/aardvark_26.jpg',
 'animalweb/aardvark_27.jpg',
 'animalweb/aardvark_29.jpg']

In [52]:
annotPaths[:20]

['animalweb/aardvark_1.pts',
 'animalweb/aardvark_10.pts',
 'animalweb/aardvark_11.pts',
 'animalweb/aardvark_12.pts',
 'animalweb/aardvark_13.pts',
 'animalweb/aardvark_14.pts',
 'animalweb/aardvark_15.pts',
 'animalweb/aardvark_16.pts',
 'animalweb/aardvark_18.pts',
 'animalweb/aardvark_19.pts',
 'animalweb/aardvark_2.pts',
 'animalweb/aardvark_20.pts',
 'animalweb/aardvark_21.pts',
 'animalweb/aardvark_22.pts',
 'animalweb/aardvark_23.pts',
 'animalweb/aardvark_24.pts',
 'animalweb/aardvark_25.pts',
 'animalweb/aardvark_26.pts',
 'animalweb/aardvark_27.pts',
 'animalweb/aardvark_29.pts']

In [53]:
# Extract labels in yolo and evaluation formats
def extract_labels(image, label, show=False):
    # get width and height of the image
    H, W = image.shape[:2]
    
    # top-left and bottom-right coords
    # of bounding box
    xs = min(label[:, 0])
    ys = min(label[:, 1])
    xe = max(label[:, 0])
    ye = max(label[:, 1])
    
    # extend the bounding box
    # by adding a small margin
    mw = (xe - xs) * 0.1
    mh = (ye - ys) * 0.1
    
    xs = max(xs - mw, 0)
    ys = max(ys - mh * 2, 0)
    
    xe = min(xe + mw, W-1)
    ye = min(ye + mh, H-1)
    
    # convert the bbox to the a yolo format:
    # x center, y center, width, height 
    xc = (xs + xe) / 2
    yc = (ys + ye) / 2
    width = (xe - xs)
    height = (ye - ys)
    
    # define coordinates of facial landmarks
    p1x = (label[0,0] + label[1,0]) / 2
    p1y = (label[0,1] + label[1,1]) / 2
    
    p2x = (label[2,0] + label[3,0]) / 2
    p2y = (label[2,1] + label[3,1]) / 2
    
    p3x = label[4,0]
    p3y = label[4,1]
    
    p4x = label[5,0]
    p4y = label[5,1]
    
    p5x = label[6,0]
    p5y = label[6,1]
    
    if show:
        cv2.rectangle(image, (int(xs), int(ys)), (int(xe), int(ye)), (0, 255, 0), 2)
        cv2.circle(image, (int(p1x), int(p1y)), 3, (0, 0, 255), -1)
        cv2.circle(image, (int(p2x), int(p2y)), 3, (255, 0, 0), -1)
        cv2.circle(image, (int(p3x), int(p3y)), 3, (0, 255, 255), -1)
        cv2.circle(image, (int(p4x), int(p4y)), 3, (255, 0, 255), -1)
        cv2.circle(image, (int(p5x), int(p5y)), 3, (255, 255, 0), -1)
    
        img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        img.show()
    
    # labels in yolo and evaluation format
    label_yolo = '{:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f}\n'.format(
        xc/W, yc/H, width/W, height/H, p1x/W, p1y/H, p2x/W, p2y/H, p3x/W, p3y/H, p4x/W, p4y/H, p5x/W, p5y/H) 
    
    label_eval = 'face {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f} {:.3f}\n'.format(
        xs, ys, width, height, p1x, p1y, p2x, p2y, p3x, p3y, p4x, p4y, p5x, p5y)
    
    return label_yolo, label_eval

In [54]:
image = cv2.imread(imagePaths[100])
label = np.loadtxt(annotPaths[100], comments=("version:", "n_points:", "{", "}"))

extract_labels(image, label, show=True)

('0.496 0.633 0.099 0.223 0.461 0.561 0.530 0.561 0.464 0.704 0.454 0.707 0.502 0.703\n',
 'face 456.770 355.238 101.419 152.154 471.793 381.795 542.911 381.997 475.204 479.584 465.221 481.223 514.461 478.697\n')

In [48]:
species = set()
count = 1

for imagePath, annotPath in zip(imagePaths, annotPaths):
    # Extract specie ID
    specie, idx = imagePath.split('/')[-1].split('.jpg')[0].split('_')
    species.add(specie)
    
    # Create a new directory
    speciePath = os.path.join(SORTED_PATH, specie)
    
    if not os.path.exists(speciePath):
        os.makedirs(speciePath, exist_ok = True)
    
    # Copy files to the new directory
    copyfile(imagePath, os.path.join(speciePath, imagePath.split('/')[-1]))
    copyfile(annotPath, os.path.join(speciePath, annotPath.split('/')[-1]))
    
    print("[INFO] Copying files {}/{}".format(count, len(imagePaths)))
    count +=1

[INFO] Copying files 1/22451
[INFO] Copying files 2/22451
[INFO] Copying files 3/22451
[INFO] Copying files 4/22451
[INFO] Copying files 5/22451
[INFO] Copying files 6/22451
[INFO] Copying files 7/22451
[INFO] Copying files 8/22451
[INFO] Copying files 9/22451
[INFO] Copying files 10/22451
[INFO] Copying files 11/22451
[INFO] Copying files 12/22451
[INFO] Copying files 13/22451
[INFO] Copying files 14/22451
[INFO] Copying files 15/22451
[INFO] Copying files 16/22451
[INFO] Copying files 17/22451
[INFO] Copying files 18/22451
[INFO] Copying files 19/22451
[INFO] Copying files 20/22451
[INFO] Copying files 21/22451
[INFO] Copying files 22/22451
[INFO] Copying files 23/22451
[INFO] Copying files 24/22451
[INFO] Copying files 25/22451
[INFO] Copying files 26/22451
[INFO] Copying files 27/22451
[INFO] Copying files 28/22451
[INFO] Copying files 29/22451
[INFO] Copying files 30/22451
[INFO] Copying files 31/22451
[INFO] Copying files 32/22451
[INFO] Copying files 33/22451
[INFO] Copying file

In [55]:
print("[INFO] Total species:", len(species))

[INFO] Total species: 350


In [82]:
# Remove duplicate images
def remove_copies(imagePaths, annotPaths):
    for i in range(len(imagePaths)):
        if not os.path.isfile(imagePaths[i]):
            continue
            
        src_image = cv2.imread(imagePaths[i])
        src_label = np.loadtxt(annotPaths[i], comments=("version:", "n_points:", "{", "}"))
        src_yolo, src_eval = extract_labels(src_image, src_label)
        
        txt_path = imagePaths[i].split('.jpg')[0]
        txt_file = open("{}.txt".format(txt_path), "w")
        txt_file.write(src_eval)
        
        for j in range(i+1, len(imagePaths)):
            if not os.path.isfile(imagePaths[j]):
                continue
                
            trg_image = cv2.imread(imagePaths[j])
            if src_image.shape == trg_image.shape and np.sum(src_image-trg_image) == 0:
                trg_label = np.loadtxt(annotPaths[j], comments=("version:", "n_points:", "{", "}"))
                trg_yolo, trg_eval = extract_labels(trg_image, trg_label)
                txt_file.write(trg_eval)
                os.remove(imagePaths[j])
                os.remove(annotPaths[j])
        
        os.remove(annotPaths[i])
        txt_file.close()

In [97]:
dirs = os.listdir(SORTED_PATH)

len(dirs)

350

In [98]:
for d in dirs:
    print("[INFO] Processing folder:", d)
    annotPaths = list(paths.list_files(os.path.join(SORTED_PATH, d), validExts="pts"))
    annotPaths = sorted(annotPaths)

    imagePaths = list(paths.list_files(os.path.join(SORTED_PATH, d), validExts="jpg"))
    imagePaths = sorted(imagePaths)
        
    remove_copies(imagePaths, annotPaths)

[INFO] Processing folder: fallowdeer
[INFO] Processing folder: aardwolf
[INFO] Processing folder: elk
[INFO] Processing folder: yelloweyedpenguin
[INFO] Processing folder: crabeaterseal
[INFO] Processing folder: addax
[INFO] Processing folder: doberman
[INFO] Processing folder: sandcat
[INFO] Processing folder: barbarymacaque
[INFO] Processing folder: goldenbamboolemur
[INFO] Processing folder: uakari
[INFO] Processing folder: hare
[INFO] Processing folder: smallasianmongoose
[INFO] Processing folder: duiker
[INFO] Processing folder: weddellseal
[INFO] Processing folder: tammarwallaby
[INFO] Processing folder: collaredbrownlemur
[INFO] Processing folder: nilgai
[INFO] Processing folder: dachshund
[INFO] Processing folder: swamprabbit
[INFO] Processing folder: margay
[INFO] Processing folder: gharial
[INFO] Processing folder: feralcat
[INFO] Processing folder: sundaslowloris
[INFO] Processing folder: canadianlynx
[INFO] Processing folder: coypu
[INFO] Processing folder: gerbil
[INFO] Pr

In [113]:
def train_test_split(imagePaths, annotPaths, set_, count):
    for imagePath, annotPath in zip(imagePaths, annotPaths):
        f = open(annotPath, 'r')    
        lines = f.readlines()
        count += len(lines)
        f.close()
    
        copyfile(imagePath, os.path.join(TARGET_PATH, set_, 'images', imagePath.split('/')[-1]))
        copyfile(annotPath, os.path.join(TARGET_PATH, set_, 'labels', annotPath.split('/')[-1]))
    
    return count

In [114]:
# Split the dataset into train, validation and test sets
face_count = 0
for d in dirs:
    print("[INFO] Processing folder:", d)
    annotPaths = list(paths.list_files(os.path.join(SORTED_PATH, d), validExts="txt"))
    annotPaths = sorted(annotPaths)

    imagePaths = list(paths.list_files(os.path.join(SORTED_PATH, d), validExts="jpg"))
    imagePaths = sorted(imagePaths)
    
    if face_count <= int(22451 * 0.7):
        set_ = 'train'
    elif face_count > int(22451 * 0.7) and face_count < int(22451 * 0.8):
        set_ = 'val'
    else:
        set_ = 'test'
        
    face_count = train_test_split(imagePaths, annotPaths, set_, face_count)

[INFO] Processing folder: fallowdeer
[INFO] Processing folder: aardwolf
[INFO] Processing folder: elk
[INFO] Processing folder: yelloweyedpenguin
[INFO] Processing folder: crabeaterseal
[INFO] Processing folder: addax
[INFO] Processing folder: doberman
[INFO] Processing folder: sandcat
[INFO] Processing folder: barbarymacaque
[INFO] Processing folder: goldenbamboolemur
[INFO] Processing folder: uakari
[INFO] Processing folder: hare
[INFO] Processing folder: smallasianmongoose
[INFO] Processing folder: duiker
[INFO] Processing folder: weddellseal
[INFO] Processing folder: tammarwallaby
[INFO] Processing folder: collaredbrownlemur
[INFO] Processing folder: nilgai
[INFO] Processing folder: dachshund
[INFO] Processing folder: swamprabbit
[INFO] Processing folder: margay
[INFO] Processing folder: gharial
[INFO] Processing folder: feralcat
[INFO] Processing folder: sundaslowloris
[INFO] Processing folder: canadianlynx
[INFO] Processing folder: coypu
[INFO] Processing folder: gerbil
[INFO] Pr