In [3]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import skimage.io as io
from pycocotools.coco import COCO # DOC: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py
import pylab
from tqdm.notebook import tqdm
import skimage
import skimage.draw
import skimage.transform
import skimage.filters
from sklearn.model_selection import train_test_split
import h5py
import wget
pylab.rcParams['figure.figsize'] = (8.0, 10.0)

In [4]:
def draw_keypoints(I, keypoints, r = 0.75, g = 0.6, b = 0.2, radius = 2):
  """Draws keypoints on image"""
  I_copy = np.copy(I)
  x_val = keypoints[::3]
  y_val = keypoints[1::3]
  v_val = keypoints[2::3]

  for x, y, v in zip(x_val, y_val, v_val):
    if (v != 0):
      rr, cc = skimage.draw.circle(y, x, radius)
      I_copy[rr - 1, cc - 1] = [r, g, b]

  return I_copy

In [5]:
def draw_bbox(I, bbox, r, g, b):
  I_copy = np.copy(I)

  x_min = np.ceil(bbox[0]).astype("int")
  x_max = np.floor(bbox[0] + bbox[2]).astype("int")
  y_min = np.ceil(bbox[1]).astype("int")
  y_max = np.floor(bbox[1] + bbox[3]).astype("int")

  rr, cc = skimage.draw.line(y_min, x_min, y_min, x_max)
  I_copy[rr - 1, cc - 1] = [r, g, b]

  rr, cc = skimage.draw.line(y_max, x_min, y_max, x_max)
  I_copy[rr - 1, cc - 1] = [r, g, b]

  rr, cc = skimage.draw.line(y_min, x_min, y_max, x_min)
  I_copy[rr - 1, cc - 1] = [r, g, b]

  rr, cc = skimage.draw.line(y_min, x_max, y_max, x_max)
  I_copy[rr - 1, cc - 1] = [r, g, b]

  return I_copy

In [6]:
def draw_segmentation_extremes(I, segmentation, r, g, b, radius):
  assert len(segmentation) == 1

  I_copy = np.copy(I)
  segmentation = np.array(segmentation[0]).reshape((-1, 2))
  x_min, y_min = np.min(segmentation, axis = 0)[0], np.min(segmentation, axis = 0)[1]
  x_max, y_max = np.max(segmentation, axis = 0)[0], np.max(segmentation, axis = 0)[1]

  rr, cc = skimage.draw.circle(y_min, x_min, radius)
  I_copy[rr - 1, cc - 1] = [r, g, b]

  rr, cc = skimage.draw.circle(y_max, x_min, radius)
  I_copy[rr - 1, cc - 1] = [r, g, b]

  rr, cc = skimage.draw.circle(y_min, x_max, radius)
  I_copy[rr - 1, cc - 1] = [r, g, b]

  rr, cc = skimage.draw.circle(y_max, x_max, radius)
  I_copy[rr - 1, cc - 1] = [r, g, b]

  return I_copy

In [7]:
def crop_image(I, anns, margin_size):
  """Crops images"""
  res_images = []
  res_keypoints = []

  # For each person in the image
  for person in range(len(anns)):  

    I_copy = np.copy(I)

    keypoints = np.array(anns[person]["keypoints"])
    num_keypoints = len(keypoints)
    bbox = anns[person]["bbox"]
    x_val = np.array(keypoints[::3]) - 1 # COCO is not 0-indexed
    y_val = np.array(keypoints[1::3]) - 1 # COCO is not 0-indexed
    v_val = keypoints[2::3]

    # If the current person is not annotated with keypoints
    if (int(np.sum(v_val)) == 0):
      continue

    # Finding edges of bounding box
    x_min = np.floor(bbox[0]).astype("int")
    y_min = np.floor(bbox[1]).astype("int")
    x_max = np.ceil(bbox[0] + bbox[2]).astype("int")
    y_max = np.ceil(bbox[1] + bbox[3]).astype("int")

    # If bounding box does not contain every keypoint, expand it so it does
    keypoints = keypoints.reshape((-1, 3))
    filtered_keypoints = keypoints[keypoints[:, -1] != 0] # keypoints where v != 0
    min_keypoints = np.min(filtered_keypoints, axis = 0)
    max_keypoints = np.max(filtered_keypoints, axis = 0)
    x_min_kp = min_keypoints[0]
    x_max_kp = max_keypoints[0]
    y_min_kp = min_keypoints[1]
    y_max_kp = max_keypoints[1]
    x_min = min(x_min, x_min_kp)
    y_min = min(y_min, y_min_kp)
    x_max = max(x_max, x_max_kp)
    y_max = max(y_max, y_max_kp)

    # If it is not possible for the image to be centered wrt the x-axis
    if (x_min_kp + (x_max_kp - x_min_kp)/2 - (y_max_kp - y_min_kp)/2 < 0 or x_min_kp + (x_max_kp - x_min_kp)/2 + (y_max_kp - y_min_kp)/2 > I.shape[1]):
      continue

     # Makes the image have squared aspect ratio
    if (x_max - x_min > y_max - y_min):
      additional = 1/2 * ((x_max - x_min) - (y_max - y_min))
      y_max += np.ceil(additional).astype("int")
      y_min -= np.floor(additional).astype("int")
    elif (y_max - y_min > x_max - x_min):
      additional = 1/2 * ((y_max - y_min) - (x_max - x_min))
      x_max += np.ceil(additional).astype("int")
      x_min -= np.floor(additional).astype("int")

    # Centers the bounding box around all of the keypoints of the person
    old_x_min = x_min
    old_x_max = x_max
    center = [x_min_kp + (x_max_kp - x_min_kp)/2, y_min_kp + (y_max_kp - y_min_kp)/2]
    x_kp_center_coor = center[0]
    curr_x_center_coor = x_min + (x_max - x_min)/2
    curr_y_center_coor = y_min + (y_max - y_min)/2
    new_x_min = x_min + np.floor(x_kp_center_coor - curr_x_center_coor).astype("int")
    new_x_max = x_max + np.floor(x_kp_center_coor - curr_x_center_coor).astype("int")
    x_min = new_x_min
    x_max = new_x_max

    # Resizes the bbox
    y_dist = y_max_kp - y_min_kp
    x_dist = x_max_kp - x_min_kp
    side_length = max(y_dist, x_dist) * (1 + margin_size)

    if (side_length != 0):
      x_side_diff = (x_max - x_min) - side_length
      x_min += int(x_side_diff//2) 
      x_max -= int(x_side_diff//2)

    # If the bbox with the margins does not fit in the image, shrink the bbox
    if (x_min < 0):
      x_max += -1 * (x_min)
      y_max += -1 * (x_min)
      y_min += x_min
      x_min = 0
    elif (x_max > I.shape[1]):
      diff = x_max - I.shape[1]
      x_max = I.shape[1]
      x_min += diff
      y_min += diff
      y_max -= diff
    
    y_side_diff = (y_max - y_min) - side_length
    if (y_side_diff != y_max - y_min):
      y_min += int(y_side_diff//2)
      y_max -= int(y_side_diff//2)

    if (side_length < x_max_kp - x_min_kp or side_length < y_max_kp - y_min_kp): # if the bbox cannot fit the keypoints, discard this person
      continue

    # Moving the bbox up or down untill it fits all of the keypoints. If this is not possible, discard the image
    y_kp_center_coor = center[1]
    new_y_min = y_min + np.floor(y_kp_center_coor - curr_y_center_coor).astype("int")
    new_y_max = y_max + np.floor(y_kp_center_coor - curr_y_center_coor).astype("int")
    y_min = new_y_min
    y_max = new_y_max

    if (y_min < 0):
      y_max += -1 * (y_min)
      y_min = 0
    elif (y_max > I.shape[0]):
      y_min -= (y_max - I.shape[0])
      y_max = I.shape[0]

    I_cropped = I[y_min : y_max, x_min : x_max]

    # Moving keypoints
    x_val = np.array(x_val) - x_min
    y_val = np.array(y_val) - y_min
      
    # Combining the keypoints into one array again
    temp_keypoints = []
    for t in zip(x_val, y_val, v_val):
      for i in t:
        temp_keypoints.append(i)

    # Casting to np.arrays
    I_cropped = np.array(I_cropped)
    temp_keypoints = np.array(temp_keypoints)

    # Appending the cropped image and the new keypoints to the results
    res_images.append(I_cropped)
    res_keypoints.append(temp_keypoints)

  return res_images, res_keypoints

In [8]:
def resize_img(I, keypoints, wanted_width, wanted_height):
  """Resizes image img and keypoints keypoints to have size wanted_height x wanted_width"""
  I_resized = skimage.transform.resize(I, (wanted_width, wanted_height))

  # Extracting keypoints coordinates
  x_val = np.array(keypoints[::3]) * wanted_width//I.shape[1]
  y_val = np.array(keypoints[1::3]) * wanted_height//I.shape[0]
  v_val = np.array(keypoints[2::3])

  # Combining the keypoints into one array again
  keypoints_resize = []
  for t in zip(x_val, y_val, v_val):
    for i in t:
      keypoints_resize.append(i)

  assert I_resized.shape[0] == wanted_height and I_resized.shape[1] == wanted_width

  return I_resized, keypoints_resize

In [9]:
def prepare_image(I, anns, wanted_width, wanted_height, margin_size):
  """Crops and resizes a single image containing people"""

  res_img, res_kp = [], []
  cropped_images, cropped_keypoints = crop_image(I, anns, margin_size)

  for image, keypoints in zip(cropped_images, cropped_keypoints):
    resized_img, resized_kp = resize_img(image, keypoints,  wanted_width, wanted_height)
    res_img.append(resized_img)
    res_kp.append(resized_kp)

  return res_img, np.array(res_kp)

In [10]:
def prepare_output(img_shape, wanted_width, wanted_height, keypoints):
  """Creates the 17 (or len(keypoints)/3) heatmaps corresponding to each keypoint of a single image with size wanted_height x wanted_width"""

  # Finds boundingbox
  x_val = keypoints[::3]
  y_val = keypoints[1::3]
  v_val = keypoints[2::3]

  # The 17 results are stored here
  res_arr = []

  for x, y, v in zip(x_val, y_val, v_val):
    x, y, v = int(x), int(y), int(v)
    res = np.zeros((img_shape[0], img_shape[1]))
    res[y - 1, x - 1] = 1

    res = skimage.transform.resize(res, (wanted_height, wanted_width))
    if (v == 1):
      res = skimage.filters.gaussian(res, sigma = 1)
    elif (v == 2):
      res = skimage.filters.gaussian(res, sigma = 0.5)

    res_arr.append(res)

  return res_arr


In [10]:
"""Saving training/testing images"""

annFile_instances = "C:/Users/André/Onedrive 2/OneDrive/Skrivebord/bachelor_thesis/code/data/annotations/instances_train2017.json"
coco_instances = COCO(annFile_instances)

catIds = coco_instances.getCatIds(catNms = ['person']) # Gets category ids with a person in it
imgIds = coco_instances.getImgIds(catIds=catIds) # Gets all images with a person in it

annFile_kp = "C:/Users/André/Onedrive 2/OneDrive/Skrivebord/bachelor_thesis/code/data/annotations/person_keypoints_train2017.json"
coco_kps = COCO(annFile_kp) 

wanted_width, wanted_height = 256, 256
margin_size = 0.1

prep_anns = {}
average_rgb = []

for imgId in tqdm(imgIds):
  img_info = coco_instances.loadImgs(imgId)[0]
  annIds = coco_kps.getAnnIds(imgIds = img_info["id"], catIds = catIds)
  anns = coco_kps.loadAnns(annIds)
  img = io.imread(img_info['coco_url'])
  prepared_imgs, prepared_anns = prepare_image(img, anns, wanted_width, wanted_height, margin_size) # Prepares the image

  for i, (prep_img, prep_ann) in enumerate(zip(prepared_imgs, prepared_anns)):
    path_input = "C:/Users/André/Onedrive 2/OneDrive/Skrivebord/bachelor_thesis/code/data/training_images/input/{}_{}.png".format(img_info["id"], i)
    io.imsave(path_input, (prep_img * 255).astype(np.uint8)) # Saves the prepared image
    prep_anns["{}_{}".format(img_info["id"], i)] = prep_ann

    if (prep_img.reshape((prep_img.shape[0], prep_img.shape[1], -1)).shape[2] == 3):
      average_rgb.append(prep_img.reshape(-1, 3))
    elif (prep_img.reshape((prep_img.shape[0], prep_img.shape[1], -1)).shape[2]== 1):
      average_rgb.append(np.stack((prep_img,)*3, axis = -1).reshape((-1, 3)))
    else:
      raise Exception("ELSE")

df = pd.DataFrame.from_dict(prep_anns)
df.to_csv("C:/Users/André/Onedrive 2/OneDrive/Skrivebord/bachelor_thesis/code/data/training_images/output.txt", index = False)

average_rgb = np.mean(np.array(average_rgb_arr).reshape((-1, 3)), axis = 0) # THIS CAUSES CRASH. MAYBE DUE TO THE UNNECESSARY RESHAPE?
np.save("C:/Users/André/Onedrive 2/OneDrive/Skrivebord/bachelor_thesis/code/data/training_images/output/average_rgb.npy", average_rgb)
f = h5py.File("C:/Users/André/Onedrive 2/OneDrive/Skrivebord/bachelor_thesis/code/data/training_images/output/average_rgb.h5", "w")
f.create_dataset("default", data = average_rgb, dtype = average_rgb.dtype)
f.close()

!zip -r /content/data/training_images/input.zip /content/data/training_images/input

loading annotations into memory...
Done (t=17.47s)
creating index...
index created!
loading annotations into memory...
Done (t=7.98s)
creating index...
index created!


HBox(children=(FloatProgress(value=0.0, max=64115.0), HTML(value='')))



KeyboardInterrupt: 

In [None]:
shuf -n 5064 -e /content/data/training_images/input/* | xargs -i mv {} /content/data/training_images/input/