# MPII dataset -- Preparation

## 1. Download the dataset
### 1.1 Download and extract the annotation files
### 1.2 Selected the images of interest and the corresponding labels

## 2. Image preparation
...

In [1]:
import sys
sys.path.append("..")

In [2]:

import matplotlib.pyplot as plt
from imutils import paths
import numpy as np
import shutil
import json
import cv2
import os

from bodypose.dataset import save_keypoints
from bodypose.dataset import create_TFRcords

from config import MPII_KEYPOINT_DICT


## 1. Download the dataset

- Download the images from
- Download the annlotations in json format from https://www.kaggle.com/datasets/harshpatel66/mpii-human-pose

## 2. Prepare the dataset
### 2.1 Inspect the annotation file

In [3]:
with open("../dataset/MPII/annotations/mpii_annotations.json", "r") as f:
    annotations = json.load(f)

print(f"The file contains {len(annotations)} annotations.")

The file contains 25204 annotations.


In [4]:
for sample in annotations:
    if sample['numOtherPeople']==2:
        break

annotations[0]

{'dataset': 'MPI',
 'isValidation': 0.0,
 'img_paths': '015601864.jpg',
 'img_width': 1280.0,
 'img_height': 720.0,
 'objpos': [594.0, 257.0],
 'joint_self': [[620.0, 394.0, 1.0],
  [616.0, 269.0, 1.0],
  [573.0, 185.0, 1.0],
  [647.0, 188.0, 0.0],
  [661.0, 221.0, 1.0],
  [656.0, 231.0, 1.0],
  [610.0, 187.0, 0.0],
  [647.0, 176.0, 1.0],
  [637.02, 189.818, 1.0],
  [695.98, 108.182, 1.0],
  [606.0, 217.0, 1.0],
  [553.0, 161.0, 1.0],
  [601.0, 167.0, 1.0],
  [692.0, 185.0, 1.0],
  [693.0, 240.0, 1.0],
  [688.0, 313.0, 1.0]],
 'scale_provided': 3.021,
 'joint_others': [[895.0, 293.0, 1.0],
  [910.0, 279.0, 1.0],
  [945.0, 223.0, 0.0],
  [1012.0, 218.0, 1.0],
  [961.0, 315.0, 1.0],
  [960.0, 403.0, 1.0],
  [979.0, 221.0, 0.0],
  [906.0, 190.0, 0.0],
  [912.491, 190.659, 1.0],
  [830.509, 182.341, 1.0],
  [871.0, 304.0, 1.0],
  [883.0, 229.0, 1.0],
  [888.0, 174.0, 0.0],
  [924.0, 206.0, 1.0],
  [1013.0, 203.0, 1.0],
  [955.0, 263.0, 1.0]],
 'scale_provided_other': 2.472,
 'objpos_other'

In [7]:
def extract_coords(labels):
    (H, W) = (labels['img_height'], labels['img_width'])
    
    c_kpts = np.array(labels['joint_self'])
    c_kpts[:, :2] /= (W, H)

    c_centers = [c_kpts[c_kpts[:, -1]==1].mean(axis=0)[:2]]
    #if labels["numOtherPeople"] == 1:
    #        coords = np.array(labels["joint_others"])
    #        c_centers.append(coords[coords[:, -1]==1].mean(axis=0)[:2] / (W, H))
    #elif labels["numOtherPeople"] > 1:
    #    for joints in labels["joint_others"]:
    #        coords = np.array(joints)
    #        c_centers.append(coords[coords[:, -1]==1].mean(axis=0)[:2] / (W, H))

    c_centers = np.array(c_centers)

    return c_kpts, c_centers



def create_labels(dirPath, annotations, dstDir):

    if not os.path.exists(dstDir):
        os.mkdir(dstDir)

    trainDir = os.sep.join([dstDir, 'train'])
    if not os.path.exists(trainDir):
        os.mkdir(trainDir)

    valDir = os.sep.join([dstDir, 'valid'])
    if not os.path.exists(valDir):
        os.mkdir(valDir)
        

    imgPaths = list(paths.list_images(dirPath))
    print(f'[INFO] Found {len(annotations)} annotations...')
    print('[INFO] Creating labels...')
    
    counter = 0
    names_dict = {}

    for i, sample in enumerate(annotations):

        print(f'\r[INFO] Processing image {i+1}/{len(imgPaths)}...', end="")
        imgName = sample["img_paths"]
        imgPath = os.sep.join([dirPath, imgName])

        if not imgName in names_dict.keys():
            names_dict[imgName] = 0
        else:
            names_dict[imgName] += 1
            imgName = imgName.replace(".jpg", f"_{names_dict[imgName]}.jpg")

        if not os.path.exists(imgPath):
            counter+=1
            continue
        
        c_keypts, c_centers = extract_coords(sample)
        
        dstPath = os.sep.join([valDir, imgName]) if sample['isValidation'] else os.sep.join([trainDir, imgName])
        kptsTxtPath = dstPath.replace(".jpg", "_kpts.txt")
        cntrsTxtPath = dstPath.replace(".jpg", "_cntrs.txt")

        isLabelSaved = save_keypoints(c_keypts, kptsTxtPath)
        isCenterSaved = save_keypoints(c_centers, cntrsTxtPath)
        shutil.copyfile(imgPath, dstPath) 

        if not isLabelSaved:
            print(f"[ERROR] Could not save label: {kptsTxtPath}.")
            break

        if not isCenterSaved:
            print(f"[ERROR] Could not save label: {cntrsTxtPath}.")
            break
    print()
    print(f'[INFO] Skipped {counter} annotations.')

In [8]:
create_labels(
    dirPath = "../dataset/MPII/raw_images/", 
    annotations = annotations, 
    dstDir = "../dataset/MPII/images/"
    )

[INFO] Found 25204 annotations...
[INFO] Creating labels...
[INFO] Processing image 25204/24984...
[INFO] Skipped 0 annotations.


##  3. Create TFRecords

In [9]:
outDir = "../dataset/tfrecords/mpii/validation/"
if not os.path.isdir(outDir):
    os.makedirs(outDir)
else:
    !rm -r $(outDit)

imgPaths = list(paths.list_images("../dataset/MPII/images/valid/"))
create_TFRcords(imgPaths = imgPaths, 
                outDir = outDir + "tfrec_val.tfrecords", 
                target_size = (416, 416),
                ext = ".jpg",
                n_splits = 10)

[INFO] Found 2958 files.
 Processing file 6/318.../10. 

2022-11-04 14:52:15.348506: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


 Processing file 318/318...
 Processing file 306/306...0. 
 Processing file 279/279...0. 
 Processing file 288/288...0. 
 Processing file 275/275...0. 
 Processing file 284/284...0. 
 Processing file 289/289...0. 
 Processing file 327/327...0. 
 Processing file 281/281...0. 
 Processing file 311/311...10. 


In [10]:
outDir = "../dataset/tfrecords/mpii/train/"
if not os.path.isdir(outDir):
    os.makedirs(outDir)
else:
    !rm -r $(outDit)

imgPaths = list(paths.list_images("../dataset/MPII/images/train/"))
create_TFRcords(imgPaths = imgPaths, 
                outDir = outDir + "tfrec_train.tfrecords", 
                target_size = (416, 416),
                ext = ".jpg",
                n_splits = 10)

[INFO] Found 22246 files.
 Processing file 2253/2253... 
 Processing file 2251/2251... 
 Processing file 2199/2199... 
 Processing file 2202/2202... 
 Processing file 2237/2237... 
 Processing file 2267/2267... 
 Processing file 2210/2210... 
 Processing file 2179/2179... 
 Processing file 2235/2235... 
 Processing file 2213/2213.... 
