In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format='retina'
from tools import _init_paths
from pathlib import Path
import torch
import torchvision.transforms as transforms

from PIL import Image, ImageOps
import numpy as np
import pylab
pylab.rcParams['figure.figsize'] = 10.0, 8.0

import dataset
import models
from config import cfg as conf
from config import update_config
from utils.utils import model_key_helper
from core.inference import get_final_preds_match

import sys
import pandas as pd
from tqdm import tqdm



In [2]:
class Visualizer:
    SKEL = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], 
            [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], 
            [2, 4], [3, 5], [4, 6], [5, 7]]

    def plot_results(self, img, pred, vis, scale):
        plt.figure()
        plt.imshow(img)
        ax = plt.gca()
        
        GREEN = [(5,6),(6,8),(8,10)]
        YELLOW = [(5,7),(7,9)]
        BLUE = [(5,11),(11,13),(13,15)]
        PINK = [(6,12),(12,14),(14,16)]

        vis_head = pred[:5][vis[:5] > 0]
        vis_body = pred[5:][vis[5:] > 0]

        for i, j in self.SKEL:
            ki = min(i, j) - 1
            kj = max(i, j) - 1
            # 0-4 head
            # 5,7,9,11,13,15 left
            # 6,8,10,12,14,16 right

            i, j = i - 1, j - 1
            src = pred[i]
            dst = pred[j]
            if vis[i] <= 0 or vis[j] <= 0:
                continue
            # face detail
            if ki < 5 and kj < 5:
                plt.plot([src[0], dst[0]],[src[1], dst[1]], linewidth=100, color=[250/255, 32/255, 98/255, 1], 
                        solid_capstyle='round', zorder=1)
            if ki >=5 and kj >= 5:
                pair = ki, kj
                trans = 1
                if pair in GREEN:
                    color = (38 / 255, 252 / 255, 145 / 255, trans)
                elif pair in YELLOW:
                    color = [250 / 255, 244 / 255, 60 / 255, trans]
                elif pair in BLUE:
                    color = [104 / 255, 252 / 255, 252 / 255, trans]
                elif pair in PINK:
                    color = [255 / 255, 148 / 255, 212 / 255, trans]
                else:
                    continue
                plt.plot([src[0], dst[0]],[src[1], dst[1]], linewidth=6.0 * scale, color=color, 
                         solid_capstyle='round', zorder=1)

            for pnt in vis_head:
                circle = plt.Circle((pnt[0], pnt[1]), 1.5 * scale * 1.2, fill=False, color='black', zorder=10)
                ax.add_artist(circle)
            for pnt in vis_body:
                circle = plt.Circle((pnt[0], pnt[1]), 3.0 * scale * 1.2, fill=False, color='black', zorder=10)
                ax.add_artist(circle)

            
        plt.axis('off')
        plt.show()


class Args:
    cfg = 'experiments/coco/transformer/w32_384x288_adamw_lr1e-4.yaml'
    opts = []
    modelDir = None
    logDir = None
    dataDir = None
    pretrained = 'lib/models/pytorch/pose_coco/pose_transformer_hrnet_w32_384x288.pth'

In [3]:
def inference(config, flip_pairs, input, size, target=None, target_weight=None, meta=None, transform=None):
    #c = meta['center']
    #s = meta['scale']
    outputs = model(transform(input)[None, ...])
    preds, _, preds_raw, preds_raw_ = get_final_preds_match(config, outputs, 
                                                np.array([size[0]/2,size[1]/2], dtype=np.float32), 
                                                np.array([2.,2.], dtype=np.float32))

    vis = np.ones((17,))
    # ignore face detail     
    vis[1:5] = 0.
    # vis[0] = 1.
    
    return preds_raw[0], vis, 1., preds_raw_

In [4]:
args = Args()
update_config(conf, args)
model = models.pose_transformer.get_pose_net(conf, is_train=False)
model.load_state_dict(model_key_helper(torch.load(args.pretrained, map_location='cpu')))

normalize = transforms.Compose([
    transforms.Resize((384, 288)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# use original TRAIN dataset as testing dataset, TEST dataset as training dataset
train_dataset = dataset.yoga(
    root = "/home/lin/Desktop/dlp_final/DATASET/TEST", mode = "train", size=(384, 288)
)
valid_dataset = dataset.yoga(
    root = "/home/lin/Desktop/dlp_final/DATASET/TRAIN", mode = "test", size=(384, 288)
)


#valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=4)
print(f"Training Loaded {len(train_dataset)} images")
print(f"Testing Loaded {len(valid_dataset)} images")

> Found 470 images...
> Found 1081 images...
Training Loaded 470 images
Testing Loaded 1081 images


In [5]:
# VIS_INDICES = [300, 100]
keypoint_dict = {0:"nose", 1:"left_eye", 2:"right_eye", 3:"left_ear", 4:"right_ear", 5:"left_shoulder",
                 6:"right_shoulder", 7:"left_elbow", 8:"right_elbow", 9:"left_wrist", 10:"right_wrist", 
                 11:"left_hip", 12:"right_hip", 13:"left_knee", 14:"right_knee", 15:"left_ankle", 16:"right_ankle"}

In [6]:
def normalize_min_max(df):
    df['x-coord norm'] = df['x-coord']
    df['y-coord norm'] = df['y-coord']
    df['x-coord norm'] = (df['x-coord norm'] - df['x-coord norm'].min()) / (df['x-coord norm'].max() - df['x-coord norm'].min())  
    df['y-coord norm'] = (df['y-coord norm'] - df['y-coord norm'].min()) / (df['y-coord norm'].max() - df['y-coord norm'].min())
    return df

In [7]:
def save_keypoint(dataset):
    for image_idx in tqdm(range(len(dataset))):

        # get data
        input, label, w, h, path = dataset[image_idx]

        # inference, 
        # preds_raw: PRTR output after scaling, [keypoints_id, x-coord, y-coord]
        # preds_raw_:PRTR direct output, range:[0,1]
        preds_raw, vis, scale, preds_raw_= inference(conf, valid_dataset.flip_pairs, input, size=(w, h),
                                                 target=None, target_weight=None, meta=None, transform=normalize)

        # coordinate normalization
        data = {'keypoint id': torch.arange(0,17),
                'keypoint name': keypoint_dict.values(),
                'vis': vis,
                'x-coord': preds_raw_[:,0],
                'y-coord': preds_raw_[:,1]
               }

        df = pd.DataFrame(data)

        # only want 13 keypoint
        exclude_keypoint = ["left_eye", "right_eye", "left_ear", "right_ear"]
#         12 keypoint
#         exclude_keypoint = ["left_eye", "right_eye", "left_ear", "right_ear", "nose"]
        keypoint_df = df[~df['keypoint name'].isin(exclude_keypoint)].reset_index(drop=True)
        # keypoint_df = normalize_min_max(keypoint_df)

        # save keypoint
        tmp = []
        # tmp.append(torch.from_numpy(keypoint_df['x-coord norm'].values))
        # tmp.append(torch.from_numpy(keypoint_df['y-coord norm'].values))
        tmp.append(torch.from_numpy(keypoint_df['x-coord'].values))
        tmp.append(torch.from_numpy(keypoint_df['y-coord'].values))
        tmp = torch.stack(tmp).transpose(0,1).numpy()

        # print(preds_raw_)
        np.savetxt(path.split('.')[0]+'.txt', tmp)

        # visualization 
        # Visualizer().plot_results(input, preds_raw, vis, scale)


In [8]:
save_keypoint(train_dataset)

100%|██████████████████████████████████████████████████████████████████████████████████| 470/470 [01:28<00:00,  5.32it/s]


In [9]:
save_keypoint(valid_dataset)

100%|████████████████████████████████████████████████████████████████████████████████| 1081/1081 [03:29<00:00,  5.16it/s]
