# Preparing training dataset for BlazePose

## example format

```JSON
[
    {
        "image": "001.png",
        "points": [[280, 540], [315, 468], [356, 354], [354, 243], [471, 331], [514, 440], [546, 540]],
        "visibility": [1, 1, 1, 1, 0, 0, 1]
    }
    {
        "image": "002.png",
        "points": [[269, 529], [289, 465], [305, 410], [310, 309], [455, 358], [542, 429], [560, 542]],
        "visibility": [1, 0, 0, 1, 1, 1, 1]
    },
    ...
]
```

In [1]:
import os
import sys
import shutil
import copy
from pathlib import Path
import json
import cv2

import scipy.io
import numpy as np
from tqdm import tqdm
from adjustText import adjust_text
from matplotlib import pyplot as plt
from matplotlib.patches import Circle

In [2]:
root = Path(os.getcwd()).parent
sys.path.append(root)

dataset_name = ['mpii']
dataset_path = list(map(lambda x: os.path.join(root, 'data', x), dataset_name))

# Format of filenames = [[mpii_img_1, mpii_img_2, ... (mpii_img_k)]]
filenames_ = list(map(lambda path, name: open(os.path.join(path, '{}_filenames.txt'.format(name))), dataset_path, dataset_name))
filenames = list(map(lambda f: f.read().split(), filenames_))
_ = list(map(lambda f: f.close(), filenames_))

In [3]:
activities = {'swimming, backstroke',
              'swimming, breaststroke, recreational',
              'swimming, butterfly, general',
              'swimming, general',
              'swimming, sidestroke, general'}

In [4]:
out_dir = os.path.join(Path(os.getcwd()), 'swim_dataset', 'images')
os.makedirs(out_dir, exist_ok=True)

In [5]:
# index to joint name conversion
mpii_idx_to_jnt = {0: 'rankl', 1: 'rknee', 2: 'rhip', 5: 'lankl', 4: 'lknee', 3: 'lhip',
                   6: 'pelvis', 7: 'thorax', 8: 'upper_neck', 11: 'relb', 10: 'rwri', 9: 'head',
                   12: 'rsho', 13: 'lsho', 14: 'lelb', 15: 'lwri'}

# This template will then be updated as and when we read ground truth
mpii_template = dict([(mpii_idx_to_jnt[i], []) for i in range(16)])

# Load the mat file.
matlab_mpii = scipy.io.loadmat(os.path.join(dataset_path[0], 'joints.mat'), struct_as_record=False)['RELEASE'][0, 0]
num_images = annotation_mpii = matlab_mpii.__dict__['annolist'][0].shape[0]

In [6]:
output_format = "{:03d}.jpg"
out_idx = 0
test_img_num = 0

# Create a Python dictionary or list with your data
data = []
json_file_path = os.path.join(Path(os.getcwd()), 'swim_dataset', "all_data.json") 
with open(json_file_path, 'w') as file:
    json.dump(data, file, indent=4)

In [7]:
def write_json(file_path, image_info, write_name):
    '''
    :param image_info: (dict)
    :param image_name: (string)
    
    '''
    
    points = []
    visibilities = []
    for item in ('rhip', 'lhip', 'rsho', 'lsho'):

        point = [int(x) for x in image_info['mpii']['img_gt'][0][item][0][:2].flatten()]  # no float16 for JSON
        points.append(point)
        visibility = int(image_info['mpii']['img_gt'][0][item][0][2])
        visibilities.append(visibility)

    # Dictionary representing the data to append
    data = {
        "image": write_name,
        "points": points,
        "visibility": visibilities,
        "original_img_name": image_info['mpii']['img_name'][0]
    }

    # Load the existing JSON file
    with open(file_path, 'r') as file:
        existing_data = json.load(file)

    # Append the new data to the existing data
    existing_data.append(data)

    # Write the updated data to the JSON file
    with open(file_path, 'w') as file:
        json.dump(existing_data, file, indent=4)

In [8]:
# Start information extraction loop
for img_idx in tqdm(range(num_images)):    # Initialize empty placeholder
    img_dict = {'mpii': {'img': [], 'img_name': [], 'img_pred': [], 'img_gt': []}}
    
        
    # Select swimming-related images 
    activity = matlab_mpii.__dict__['act'][img_idx][0].__dict__['act_name']
    if activity.shape[0] == 0 or activity[0] not in activities:
        continue 
    else:
        out_idx += 1

    img_json_name = output_format.format(out_idx)

    annotation_mpii = matlab_mpii.__dict__['annolist'][0, img_idx]
    train_test_mpii = matlab_mpii.__dict__['img_train'][0, img_idx].flatten()[0]
    person_id = matlab_mpii.__dict__['single_person'][img_idx][0].flatten()

    # Load the individual image. Throw an exception if image corresponding to filename not available.
    img_name = annotation_mpii.__dict__['image'][0, 0].__dict__['name'][0]
    try:
        image = plt.imread(os.path.join(dataset_path[0], 'images', img_name))
    except FileNotFoundError:
        print('Could not load filename: {}'.format(img_name))
        continue

    # Copy images to swim_dataset/images/ folder in filename format "001.png"
    source = os.path.join(Path(os.getcwd()).parent, 'data', 'mpii', 'images', img_name)
    output = os.path.join(out_dir, img_json_name)
    shutil.copy(source, output)

    # Avoid modifying the template and create a copy
    gt_per_image = copy.deepcopy(mpii_template)

    # Flag is set to true if atleast one person exists in the image with joint annotations.
    # If Flag == True, then the image and GT is considered for visualization, else skip
    annotated_person_flag = False

    # Iterate over persons
    for person in (person_id - 1):
        try:
            annopoints_img_mpii = annotation_mpii.__dict__['annorect'][0, person].__dict__['annopoints'][0, 0]
            num_joints = annopoints_img_mpii.__dict__['point'][0].shape[0]

            # Iterate over present joints
            for i in range(num_joints):
                x = annopoints_img_mpii.__dict__['point'][0, i].__dict__['x'].flatten()[0]
                y = annopoints_img_mpii.__dict__['point'][0, i].__dict__['y'].flatten()[0]
                id_ = annopoints_img_mpii.__dict__['point'][0, i].__dict__['id'][0][0]
                vis = annopoints_img_mpii.__dict__['point'][0, i].__dict__['is_visible'].flatten()

                # No entry corresponding to visible
                if vis.size == 0:
                    vis = 1
                else:
                    vis = vis.item()

                gt_per_joint = np.array([x, y, vis]).astype(np.float16)
                gt_per_image[mpii_idx_to_jnt[id_]].append(gt_per_joint)

            annotated_person_flag = True
        except KeyError:
            # Person 'x' could not have annotated joints, hence move to person 'y'
            continue

    if not annotated_person_flag:
        continue

    # Update the template copy with image, name and ground truth
    img_dict['mpii']['img'].append(image)
    img_dict['mpii']['img_name'].append(img_name)
    img_dict['mpii']['img_gt'].append(gt_per_image)

    try:
        write_json(json_file_path, img_dict, img_json_name)
    except IndexError:
        continue

100%|███████████████████████████████████| 24987/24987 [00:05<00:00, 4173.95it/s]


In [9]:
json_file = os.path.join(Path(os.getcwd()), 'swim_dataset', 'all_data.json')

# Read the JSON file
with open(json_file, 'r') as file:
    data = json.load(file)

# Count the items in the JSON data
count = len(data)

# Print the count
print("Number of items:", count)

Number of items: 147


In [10]:
# TODO

def visualize_json(json_input):
    
    # Read the JSON file
    with open(json_input, 'r') as file:
        image_infos = json.load(file)
    
    img_dump = os.path.join(os.getcwd(), 'results', 'viz_gt')
    os.makedirs(img_dump, exist_ok=True)

    for i in tqdm(range(len(image_infos))):
        
        image_info = image_infos[i]
        
        img_name = image_info['image']
        pts = image_info["points"]
        vis = image_info["visibility"]
        file_name = os.path.join(Path(os.getcwd()), 'swim_dataset', 'images', img_name)
        img = plt.imread(file_name)
        plt.imshow(img)
        
        for i in range(4):
            x, y = pts[i][0], pts[i][1]
            v = vis[i]
            if v == 1:
                plt.plot(x, y, marker='v', color="blue")
        
        # Save the plotted image
        save_path = os.path.join(img_dump, img_name[:-4] + ".png")
        plt.savefig(save_path)

        plt.close()  # Close the current figure to avoid memory issues

In [11]:
visualize_json(json_file)

100%|█████████████████████████████████████████| 147/147 [00:55<00:00,  2.64it/s]
