In [2]:
import dlib, cv2, os
from ast import literal_eval
from imutils import face_utils
import numpy as np
import matplotlib.pyplot as plt
import time
import pandas as pd

## Load csv files which contain head bounding box & key point coordinates
Bounding boxes and key points were generated with a third pary dog detector: https://github.com/kairess/dog_face_detector

In [3]:
df_train = pd.read_csv("../csv_files/preprocessing_train.csv", index_col = 0)
df_val = pd.read_csv("../csv_files/preprocessing_val.csv", index_col = 0)

df_all = pd.concat([df_train, df_val])

In [4]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 65217 entries, 0 to 65216
Data columns (total 15 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   img_path                 65217 non-null  object 
 1   img_name                 65217 non-null  object 
 2   num_pixels_initial_crop  65217 non-null  int64  
 3   bb                       65217 non-null  bool   
 4   lm                       65217 non-null  bool   
 5   bb_x1                    40657 non-null  float64
 6   bb_x2                    40657 non-null  float64
 7   bb_y1                    40657 non-null  float64
 8   bb_y2                    40657 non-null  float64
 9   lm_00                    40657 non-null  object 
 10  lm_01                    40657 non-null  object 
 11  lm_02                    40657 non-null  object 
 12  lm_03                    40657 non-null  object 
 13  lm_04                    40657 non-null  object 
 14  lm_05                 

Head bounding boxes and key points could only be detected for 40k out of the 65k train images.

In [26]:
df_val.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5199 entries, 0 to 5198
Data columns (total 15 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   img_path                 5199 non-null   object 
 1   img_name                 5199 non-null   object 
 2   num_pixels_initial_crop  5199 non-null   int64  
 3   bb                       5199 non-null   bool   
 4   lm                       5199 non-null   bool   
 5   bb_x1                    3568 non-null   float64
 6   bb_x2                    3568 non-null   float64
 7   bb_y1                    3568 non-null   float64
 8   bb_y2                    3568 non-null   float64
 9   lm_00                    3568 non-null   object 
 10  lm_01                    3568 non-null   object 
 11  lm_02                    3568 non-null   object 
 12  lm_03                    3568 non-null   object 
 13  lm_04                    3568 non-null   object 
 14  lm_05                   

Head bounding boxes and key points could only be detected for 3.5k out of the 5k validation images.

## Filter out bad images and refine crops

### Filter 1:
Filter out all images with a resolution lower than 65k pixesls (250x250)

### Filter 2:
Filter out all images were no bounding box & keypoints are detected.

### Filter 3:
Filter out all images which show a side view of a dog face. The image shows a side view when the nose is further out than one of the eyes.

### Refine crops
Adapt bounding box to a square and resize images to 256x256. 

### Debugging
You can uncomment matplotlib calls to see intermediate results.

In [61]:
df_train

Unnamed: 0,img_path,img_name,num_pixels_initial_crop,bb,lm,bb_x1,bb_x2,bb_y1,bb_y2,lm_00,lm_01,lm_02,lm_03,lm_04,lm_05
0,/scratch/local/ssd/janhr/data/tsinghua_dogs_hi...,0.jpg,65844,False,False,,,,,,,,,,
1,/scratch/local/ssd/janhr/data/tsinghua_dogs_hi...,1.jpg,8648,True,True,11.0,93.0,12.0,94.0,"(63, 12)","(83, 21)","(71, 46)","(53, 73)","(40, 14)","(43, 39)"
2,/scratch/local/ssd/janhr/data/tsinghua_dogs_hi...,2.jpg,199320,True,True,16.0,368.0,34.0,386.0,"(222, 52)","(313, 70)","(272, 163)","(187, 244)","(131, 53)","(138, 145)"
3,/scratch/local/ssd/janhr/data/tsinghua_dogs_hi...,3.jpg,91314,True,True,47.0,251.0,112.0,316.0,"(149, 82)","(182, 83)","(198, 184)","(196, 287)","(116, 87)","(127, 186)"
4,/scratch/local/ssd/janhr/data/tsinghua_dogs_hi...,4.jpg,110295,True,True,55.0,259.0,112.0,316.0,"(152, 86)","(190, 91)","(210, 189)","(197, 280)","(114, 96)","(122, 195)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65212,/scratch/local/ssd/janhr/data/tsinghua_dogs_hi...,65223.jpg,918702,True,True,-39.0,691.0,220.0,951.0,"(466, 271)","(598, 312)","(522, 580)","(285, 739)","(347, 276)","(235, 545)"
65213,/scratch/local/ssd/janhr/data/tsinghua_dogs_hi...,65224.jpg,450846,True,True,185.0,537.0,311.0,663.0,"(324, 301)","(454, 332)","(425, 444)","(380, 546)","(198, 356)","(274, 466)"
65214,/scratch/local/ssd/janhr/data/tsinghua_dogs_hi...,65225.jpg,885115,True,True,-2.0,421.0,453.0,876.0,"(336, 474)","(455, 558)","(330, 662)","(144, 700)","(192, 463)","(162, 578)"
65215,/scratch/local/ssd/janhr/data/tsinghua_dogs_hi...,65226.jpg,948996,True,True,81.0,811.0,128.0,858.0,"(517, 136)","(689, 167)","(654, 378)","(493, 541)","(363, 185)","(360, 392)"


In [60]:
filter_1 = 0
filter_2 = 0
filter_3 = 0
valid_imgs = 0

image_output_dir = "/scratch/local/ssd/janhr/data/dogs_cropped/all/"
image_input_dir = "/scratch/local/ssd/janhr/data/tsinghua_dogs_high_res_cropped/train/"

for index, row in df_all[510:4000].iterrows():
    img_name = row["img_name"]
    img_path = image_input_dir + img_name
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # 1. filter: pixel size
    num_pixels = row["num_pixels_initial_crop"]
    if num_pixels < 250*250:
        continue
    num_imgs_after_filter_1 += 1
        

    # 2. filter check if head and landmarks were detected
    if row["bb"] != True or row["lm"] != True:
        continue
    num_imgs_after_filter_3 += 1

    # 3. noise right or left of eye -> side view -> filter out
    right_eye = literal_eval(row["lm_02"])
    left_eye = literal_eval(row["lm_05"])
    noise = literal_eval(row["lm_03"])

    if noise[0] > right_eye[0] or noise[0] < left_eye[0]:
        continue
    num_imgs_after_filter_3 += 1
        
    
    x1 = int(row["bb_x1"])
    y1 = int(row["bb_y1"])
    x2 = int(row["bb_x2"])
    y2 = int(row["bb_y2"])
    
    img_width = img.shape[1]
    img_height = img.shape[0]

    x1 = max(0,x1)
    x2 = min(img_width,x2)
    y1 = max(0,y1)
    y2 = min(img_height,y2)
    
    # 5. Square images
    width = x2-x1
    height = y2-y1

    
    # make crop a square
    crop = None
    if (height > width):
        ratio = height/width
        border_x1 = int((ratio-1)*width/2)
        border_x2 = int((ratio-1)*width/2)
        border_x1_copy = 0
        border_x2_copy = 0
        # make crop wider: change x1 and x2 or copy border
        # extend x1 as best as possible
        new_x1 = x1 - border_x1
        if new_x1 < 0: 
            # extend x1 as fast as possible and extend border after crop
            border_x1_copy = border_x1 - x1
#             print(f"old x1: {x1} new_x1: {new_x1}, broder_x1_copy: {border_x1_copy}")
            new_x1 = 0
        x1 = new_x1
        
        new_x2 = x2 + border_x2
        if new_x2 > img_width: 
            # extend x1 as fast as possible and extend border after crop
            border_x2_copy = new_x2 - img_width
#             print(f"old x2: {x2} new_x2: {new_x2}, broder_x2_copy: {border_x2_copy}")
            new_x2 = img_width
        x2 = new_x2
             
        crop = img[y1:y2, x1:x2]
#         print(f"border_x1_copy: {border_x1_copy}, border_x2_copy: {border_x2_copy}")
        crop = cv2.copyMakeBorder(crop,0,0,border_x1_copy,border_x2_copy,cv2.BORDER_REPLICATE)
    
    elif(width > height):
        ratio = width/height
        border_y1 = int((ratio-1)*height/2)
        border_y2 = int((ratio-1)*height/2)
        border_y1_copy = 0
        border_y2_copy = 0
        # make crop wider: change x1 and x2 or copy border
        # extend x1 as best as possible
        new_y1 = y1 - border_y1
        if new_y1 < 0: 
            # extend x1 as fast as possible and extend border after crop
            border_y1_copy = border_y1 - y1
#             print(f"old y1: {y1} new_y1: {new_y1}, broder_y1_copy: {border_y1_copy}")
            new_y1 = 0
        y1 = new_y1
        
        new_y2 = y2 + border_y2
        if new_y2 > img_height: 
            # extend x1 as fast as possible and extend border after crop
            border_y2_copy = new_y2 - img_height
#             print(f"old y2: {y2} new_y2: {new_y2}, broder_y2_copy: {border_y2_copy}")
            new_y2 = img_height
        y2 = new_y2

        crop = img[y1:y2, x1:x2]
#         print(f"border_y1_copy: {border_y1_copy}, border_y2_copy: {border_y2_copy}")
        crop = cv2.copyMakeBorder(crop,border_y1_copy,border_y2_copy,0,0,cv2.BORDER_REPLICATE)
    else:
        crop = img[y1:y2, x1:x2]
    
    
# ONLY FOR DEBUG
#     fig = plt.figure(figsize=(16,16))
#     x1 = int(row["bb_x1"])
#     y1 = int(row["bb_y1"])
#     x2 = int(row["bb_x2"])
#     y2 = int(row["bb_y2"])
#     img_org = img.copy()
#     cv2.rectangle(img_org, pt1=(x1, y1), pt2=(x2, y2), thickness=2, color=(255,0,0), lineType=cv2.LINE_AA)

#     shape = np.array([literal_eval(row["lm_00"]),literal_eval(row["lm_01"]),literal_eval(row["lm_02"]),literal_eval(row["lm_03"]),literal_eval(row["lm_04"]), literal_eval(row["lm_05"])])
#     for i, p in enumerate(shape):
#         cv2.circle(img_org, center=tuple(p), radius=3, color=(0,0,255), thickness=-1, lineType=cv2.LINE_AA)
#         cv2.putText(img_org, str(i), tuple(p), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA)

#     ax1 = fig.add_subplot(221)
#     ax2.set_title("Original image with bounding box + key points")
#     ax1.xaxis.tick_top()
#     ax1.imshow(img_org)
    
#     ax2 = fig.add_subplot(222)
#     ax2.set_title("Final cropped img")
#     ax2.xaxis.tick_top()
#     ax2.imshow(crop)
#     break

    # resize and save image
    resized_img = cv2.resize(crop, (256,256))
    img_out = cv2.cvtColor(resized_img, cv2.COLOR_RGB2BGR)
    img_output_path = image_output_dir + img_name
    cv2.imwrite(img_output_path, img_out)

    # bring image to same size
    valid_imgs += 1
    print(f"{index}/{len(df_all)}", end="\r")
    
total_num_imgs = len(df_all)

print(f"Total number of images: {total_num_imgs}")
print(f"Number of images which pass filter 1: {num_imgs_after_filter_1}")
print(f"Number of images which pass filter 1 & 2: {num_imgs_after_filter_2}")
print(f"Number of images which pass filter 1,2 & 3: {num_imgs_after_filter_3}")
print(f"Total number of valid images: {valid_imgs}")

1145/70416

KeyboardInterrupt: 