In [None]:
!pip install matplotlib pandas imageio tqdm

In [None]:
import cv2
import json
from IPython.display import clear_output, display

In [None]:
from pathlib import Path
import time
import matplotlib.pyplot as plt
import pandas as pd
import imageio
import tqdm
from PIL import Image

%matplotlib inline

## Download

youtube-dl --rm-cache-dir

youtube-dl -f bestvideo https://youtu.be/Fkadv0VnZkI

youtube-dl -f bestvideo https://www.youtube.com/playlist?list=PLAPUEAObdbMb747QUFsjQ2e9MPz1FkDnQ

youtube-dl -f bestvideo https://www.youtube.com/playlist?list=PLAPUEAObdbMaBtaElCDD3XD4hWO631ihN

## Preprocess

ffmpeg -i video1.webm -r 1 second_iteration/%06d_img.jpg

In [None]:
videos_path = Path('../sport_data/')

In [None]:
videos_suffixes = [n.suffix for n in videos_path.iterdir()]

In [None]:
pd.Series.value_counts(videos_suffixes)

In [None]:
command = ''
for i, p in enumerate(videos_path.iterdir()):
    if p.suffix in ['.mp4', '.webm']:
        command += f'ffmpeg -i "{p.name}" -r 10 second_iteration/vid_{i:02}_%05d.jpg; '

In [None]:
# command

## Raw images 

In [None]:
dataset_path = Path('../sport_data/second_iteration/')

In [None]:
suffixes = [n.suffix for n in dataset_path.iterdir()]

In [None]:
pd.Series.value_counts(suffixes)

In [None]:
for p in dataset_path.iterdir():
    if p.suffix.lower() not in ['.jpg', '.png', '.jpeg']:
        p.unlink()

In [None]:
for i in range(30):
    print(i, len(list(dataset_path.glob(f'vid_{i:02}*'))))

In [None]:
images = []
for p in sorted(list(dataset_path.glob('vid_04*')))[540:550]:
    images.append(imageio.imread(p))

In [None]:
import numpy as np

In [None]:
for i in range(len(images[-5:])):
    plt.figure()
    plt.imshow(images[i])

## Run pose detection

See instructions in ./deep-high-resolution-net.pytorch/demo/run.sh

## Pose detection results

In [None]:
bboxes = Path('../sport/deep-high-resolution-net.pytorch/demo/output2/boxes/')
poses = Path('../sport/deep-high-resolution-net.pytorch/demo/output2/poses/')

In [None]:
for b in list(bboxes.iterdir())[:5]:
    plt.figure()
    plt.imshow(plt.imread(b))

In [None]:
for b in list(poses.iterdir())[:5]:
    plt.figure()
    plt.imshow(plt.imread(b))

## Pose detection coords processing

In [None]:
poses_df = pd.read_csv('../sport/deep-high-resolution-net.pytorch/demo/output2/pose-data.csv',
                       encoding = "ISO-8859-1")

In [None]:
poses_df.box_top_left_x = poses_df.box_top_left_x.astype(int)
poses_df.box_top_left_y = poses_df.box_top_left_y.astype(int)
poses_df.box_bottom_right_x = poses_df.box_bottom_right_x.astype(int)
poses_df.box_bottom_right_y = poses_df.box_bottom_right_y.astype(int)

In [None]:
def extract_from_row(row):
    box_left = (row.box_top_left_x, row.box_top_left_y)
    box_right = (row.box_bottom_right_x, row.box_bottom_right_y)

    foot_left = (row.nose_x, row.nose_y)
    knee_left = (row.left_eye_x, row.left_eye_y)
    hip_left = (row.right_eye_x, row.right_eye_y)
    hip_right = (row.left_ear_x, row.left_ear_y)
    knee_right = (row.right_ear_x, row.right_ear_y)
    hip_center = (row.right_shoulder_x, row.right_shoulder_y)
    top_center = (row.left_elbow_x, row.left_elbow_y)
    wrist_left = (row.right_wrist_x, row.right_wrist_y)
    shoulder_left = (row.right_hip_x, row.right_hip_y)
    shoulder_right = (row.left_knee_x, row.left_knee_y)
    wrist_right = (row.left_ankle_x, row.left_ankle_y)
    head_top = (row.left_wrist_x, row.left_wrist_y)
    head_bottom = (row.right_elbow_x, row.right_elbow_y)
    foot_right = (row.left_shoulder_x, row.left_shoulder_y)
    elbow_left = (row.left_hip_x, row.left_hip_y)
    elbow_right = (row.right_knee_x, row.right_knee_y)
    
    return box_left, box_right, foot_left, knee_left, hip_left, hip_right, \
           knee_right, hip_center, top_center, wrist_left, shoulder_left, \
           shoulder_right, wrist_right, head_top, head_bottom, foot_right, \
           elbow_left, elbow_right

In [None]:
def draw_on_image(row, img):
    box_left, box_right, foot_left, knee_left, hip_left, hip_right, \
           knee_right, hip_center, top_center, wrist_left, shoulder_left, \
           shoulder_right, wrist_right, head_top, head_bottom, foot_right, \
           elbow_left, elbow_right = extract_from_row(row)

    img = cv2.rectangle(img, box_left, box_right, color=(0, 255, 0), thickness=3)

    img = cv2.circle(img, foot_left, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'ft_L', foot_left, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, knee_left, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'kn_L', knee_left, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, knee_right, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'kn_R', knee_right, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, hip_left, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'hip_L', hip_left, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, hip_right, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'hip_R', hip_right, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, hip_center, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'hip', hip_center, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, top_center, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'top_C', top_center, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, wrist_left, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'wr_L', wrist_left, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, shoulder_left, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'shdr_L', shoulder_left, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, shoulder_right, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'shdr_R', shoulder_right, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, wrist_right, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'wr_R', wrist_right, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, head_top, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'head_T', head_top, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, head_bottom, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'head_B', head_bottom, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, foot_right, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'ft_R', foot_right, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, elbow_left, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'elb_L', elbow_left, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    img = cv2.circle(img, elbow_right, 4, (255, 0, 0), 2)
    img = cv2.putText(img, 'elb_R', elbow_right, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 255, thickness=2)
    return img

In [None]:
def extract_from_image(row, img):
    box_left, box_right, foot_left, knee_left, hip_left, hip_right, \
           knee_right, hip_center, top_center, wrist_left, shoulder_left, \
           shoulder_right, wrist_right, head_top, head_bottom, foot_right, \
           elbow_left, elbow_right = extract_from_row(row)
    box_right = (box_right[0], (knee_left[1] + knee_right[1]) // 2)
    return img[box_left[1]: box_right[1], box_left[0]: box_right[0], :]

## Extract usefull crops from images

In [None]:
def map_coord_to_crop(coord, box_l, box_r):
    x = min(coord[0] - box_l[0], box_r[0])
    y = min(coord[1] - box_l[1], box_r[1])
    return (x, y)

def extract_from_image_with_points(row, img):
    box_left, box_right, foot_left, knee_left, hip_left, hip_right, \
           knee_right, hip_center, top_center, wrist_left, shoulder_left, \
           shoulder_right, wrist_right, head_top, head_bottom, foot_right, \
           elbow_left, elbow_right = extract_from_row(row)
    box_right = (box_right[0], (knee_left[1] + knee_right[1]) // 2)
    
    hip_left = map_coord_to_crop(hip_left, box_left, box_right)
    hip_right = map_coord_to_crop(hip_right, box_left, box_right)
    shoulder_left = map_coord_to_crop(shoulder_left, box_left, box_right)
    shoulder_right = map_coord_to_crop(shoulder_right, box_left, box_right)
    head_bottom = map_coord_to_crop(head_bottom, box_left, box_right)
    knee_left = map_coord_to_crop(knee_left, box_left, box_right)
    knee_right = map_coord_to_crop(knee_right, box_left, box_right)
    top_center = map_coord_to_crop(top_center, box_left, box_right)
    wrist_left = map_coord_to_crop(wrist_left, box_left, box_right)
    wrist_right = map_coord_to_crop(wrist_right, box_left, box_right)
    elbow_left = map_coord_to_crop(elbow_left, box_left, box_right)
    elbow_right = map_coord_to_crop(elbow_right, box_left, box_right)

    
    return img[box_left[1]: box_right[1], box_left[0]: box_right[0], :],\
        (hip_left, hip_right, shoulder_left, shoulder_right, head_bottom,
         knee_left, knee_right, top_center, wrist_left, wrist_right,
         elbow_left, elbow_right)

## Resize and pad of crops

In [None]:
def image_resize(image, keypoints, width = None, height = None, inter = cv2.INTER_AREA):
    # initialize the dimensions of the image to be resized and
    # grab the image size
    dim = None
    (h, w) = image.shape[:2]

    # if both the width and height are None, then return the
    # original image
    if width is None and height is None:
        return image

    # check to see if the width is None
    if width is None:
        # calculate the ratio of the height and construct the
        # dimensions
        r = height / float(h)
        new_w, new_h = (int(w * r), height)

    # otherwise, the height is None
    else:
        # calculate the ratio of the width and construct the
        # dimensions
        r = width / float(w)
        new_w, new_h = (width, int(h * r))

    # resize the image
    resized_image = cv2.resize(image.copy(), (new_w, new_h), interpolation = inter)
    
    resized_keypoints = []
    for k in keypoints:
        resized_keypoints.append((int(k[0] * (new_w / w)), int(k[1] * (new_h / h))))
    # return the resized image
    return resized_image, resized_keypoints

def image_pad(image, keypoints, width=None, height=None):
    (h, w, c) = image.shape
    if width is None and height is None:
        return image
    
    if width is None:
        raise NotImplementedError()
    else:
        pad_image = np.zeros((h, width, c), dtype=np.uint8)
        pad_keypoints = []
        
        if w < width:
            pad_size = width - w
            pad_left = pad_size // 2
            pad_right = pad_left + w
            pad_image[:, pad_left:pad_right, :] = image.copy()
            
            for k in keypoints:
                pad_keypoints.append((k[0] + pad_left, k[1]))
        else:
            crop_size = w - width
            crop_left = crop_size // 2
            crop_right = crop_left + width
            pad_image = image[:, crop_left:crop_right, :].copy()
            
            for k in keypoints:
                pad_keypoints.append((k[0] - crop_left, k[1]))
    
    return pad_image, pad_keypoints

def resize_pad(image, keypoints, width=None, height=None):
    image = image[:, :, :3]
    if image.dtype == np.float32:
        image = (image * 255).astype(np.uint8)
        
    res_image, res_keyp = image_resize(image, keypoints, height=height)
    pad_image, pad_keyp = image_pad(res_image, res_keyp, width=width)
    return pad_image, pad_keyp

In [None]:
def mean_kp(a, b):
    return (a[0] + b[0]) // 2, (a[1] + b[1]) // 2

def make_pose_image(image, keyp):
    h, w = image.shape[:2]
    image_pose = np.zeros((h, w))
    
    hip_center = mean_kp(keyp[0], keyp[1])
    shoulder_center = mean_kp(keyp[2], keyp[3])
    
    colors = np.linspace(0, 255, 11)[1:]
    thickness = 15
#     import pdb;pdb.set_trace()
    image_pose = cv2.line(image_pose, keyp[0], keyp[1], int(colors[0]), thickness) # hip line
    image_pose = cv2.line(image_pose, keyp[2], keyp[3], int(colors[1]), thickness) # shoulder line
    image_pose = cv2.line(image_pose, hip_center, shoulder_center, int(colors[2]), thickness) # shoulder hip line
    image_pose = cv2.line(image_pose, keyp[4], shoulder_center, int(colors[3]), thickness) # shoulder head line
    image_pose = cv2.line(image_pose, keyp[0], keyp[5], int(colors[4]), thickness) # left knee line
    image_pose = cv2.line(image_pose, keyp[1], keyp[6], int(colors[5]), thickness) # right knee line
    image_pose = cv2.line(image_pose, keyp[2], keyp[10], int(colors[6]), thickness) # left elbow line
    image_pose = cv2.line(image_pose, keyp[3], keyp[11], int(colors[7]), thickness) # right elbow line
    image_pose = cv2.line(image_pose, keyp[10], keyp[8], int(colors[8]), thickness) # left wrist line
    image_pose = cv2.line(image_pose, keyp[11], keyp[9], int(colors[9]), thickness) # right wrist line
    
#     image_pose = cv2.resize(image_pose, (64, 64))

    keyp_features = [[x[0] / w, x[1] / h] for x in keyp]
    keyp_features = np.array(keyp_features).ravel()
    
    return image_pose, keyp_features

In [None]:
small_images = 0
target_width, target_height = 512, 512
# mean_keypoints = np.loadtxt('mean_keypoints.txt', dtype=np.int32) # from EDA.ipynb
all_features = []
idx = 0

for i in tqdm.tqdm(range(len(poses_df))):
    fname = poses_df.iloc[i].fname
    box_num = poses_df.iloc[i].box_num
    name = fname.split('/')[-1]

    try:
        img = plt.imread('../sport_data/second_iteration/' + name)
    except:
        print('Error with opening', name)
        continue

    row = poses_df.iloc[i]
    img, keypoints = extract_from_image_with_points(row, img)
    
    
    if img.shape[0] < 400:
        small_images += 1
        continue
        
    pad_image, pad_keyp = resize_pad(img, keypoints, target_width, target_height)
    pos_image, features = make_pose_image(pad_image, pad_keyp)
    
    all_features.append(features)
    imageio.imsave(f'stylegan2/youtube_512_conditional/img_{idx}.jpg', pad_image)
    imageio.imsave(f'stylegan2/youtube_512_conditional/label_{idx}.jpg', pos_image.astype(np.uint8))
    idx += 1
    
#     if idx > 10:
#         break

all_features = np.vstack(all_features)
np.save('stylegan2/youtube_512_conditional.npy', all_features)

In [None]:
!stylegan2/youtube_512_conditional/ | wc

In [None]:
plt.imshow(pad_image)

In [None]:
pos_image, features = make_pose_image(pad_image, pad_keyp)

In [None]:
plt.plot(features)

In [None]:
plt.imshow(pos_image)
plt.gray()

---

In [None]:
import sys
sys.path.insert(0, '../sport/stylegan2/')

In [None]:
from dataset_tool import TFRecordExporter

In [None]:
tfrecord_dir = '../sport/stylegan2/datasets/youtube_keypoints/'
count = len(all_features)

with TFRecordExporter(tfrecord_dir, count) as tfr:
    order = tfr.choose_shuffled_order()
    for idx in tqdm.tqdm(range(order.size)):
        fname = f'stylegan2/youtube_512_conditional/img_{order[idx]}.jpg'
        img = plt.imread(fname)
        img = np.transpose(np.array(img), (2, 0, 1))
        tfr.add_image(np.array(img))
    tfr.add_labels(all_features[order])

In [None]:
idx = 5
fname = f'stylegan2/youtube_512_conditional/img_{order[idx]}.jpg'
img = plt.imread(fname)

In [None]:
plt.imshow(img)

In [None]:
for i in range(len(all_features[order][idx]) // 2):
    p = int(all_features[order][idx][i * 2] * 512), int(all_features[order][idx][i * 2 + 1] * 512)
    img = cv2.circle(img, p, 4, (255, 0, 0), 2)

In [None]:
plt.imshow(img)

In [None]:
plt.plot(all_features[order][idx])

## Extract separate person in separate video

In [None]:
one_person_df = poses_df[poses_df.fname.apply(lambda x: 'vid_10' in x) & (poses_df.box_num == 0)]
one_person_df = one_person_df.sort_values('fname')

In [None]:
for i in range(50):
    img = extract_from_image(one_person_df.iloc[i],
                             plt.imread(dataset_path / one_person_df.iloc[i].fname.split('/')[-1]))
    clear_output(True)
    display(Image.fromarray(img).resize((128, 256)))
    time.sleep(0.01)

In [None]:
target_width, target_height = 512, 512
idx = 0
pad_keyp_all = []

for i in tqdm.tqdm(range(len(one_person_df))):
    fname = one_person_df.iloc[i].fname
    box_num = one_person_df.iloc[i].box_num
    name = fname.split('/')[-1]

    try:
        img = plt.imread('../sport_data/second_iteration/' + name)
    except:
        print('Error with opening', name)
        continue

    row = one_person_df.iloc[i]
    img, keypoints = extract_from_image_with_points(row, img)
        
    pad_image, pad_keyp = resize_pad(img, keypoints, target_width, target_height)
    pos_image, features = make_pose_image(pad_image, pad_keyp)
    pad_keyp_all.append(pad_keyp)

    imageio.imsave(f'stylegan2/youtube_512_one_person/img_{idx}.jpg', pad_image)
    imageio.imsave(f'stylegan2/youtube_512_one_person/label_{idx}.jpg', pos_image.astype(np.uint8))
    idx += 1

In [None]:
pad_keyp_all = np.array(pad_keyp_all)
mean_keyp = np.mean(pad_keyp_all, axis=0).astype(int)
mean_keyp = [tuple(x) for x in mean_keyp]

mean_keyp[8] = (mean_keyp[8][0], mean_keyp[8][1] + 100)
mean_keyp[9] = (mean_keyp[9][0], mean_keyp[9][1] + 100)

pos_image, features = make_pose_image(pad_image, mean_keyp)

In [None]:
plt.imshow(pos_image)

In [None]:
imageio.imsave(f'stylegan2/mean_pose_label.jpg', pos_image.astype(np.uint8))