# Generating dataset for training and validation

## Selecting images related to swimming

In [1]:
import os
import sys
import copy
from pathlib import Path

import scipy.io
# import numpy as np
# from tqdm import tqdm
# from adjustText import adjust_text
# from matplotlib import pyplot as plt
# from matplotlib.patches import Circle

In [2]:
root = Path(os.getcwd())
sys.path.append(root)

dataset_name = ['mpii']
dataset_path = list(map(lambda x: os.path.join(root, 'data', x), dataset_name))

In [3]:
# Load the mat file.
matlab_mpii = scipy.io.loadmat(os.path.join(dataset_path[0], 'joints.mat'), struct_as_record=False)['RELEASE'][0, 0]
num_images = annotation_mpii = matlab_mpii.__dict__['annolist'][0].shape[0]

In [4]:
matlab_mpii.__dict__

{'_fieldnames': ['annolist',
  'img_train',
  'version',
  'single_person',
  'act',
  'video_list'],
 'annolist': array([[<scipy.io.matlab._mio5_params.mat_struct object at 0x7fac12618190>,
         <scipy.io.matlab._mio5_params.mat_struct object at 0x7fac1262b880>,
         <scipy.io.matlab._mio5_params.mat_struct object at 0x7fac130e03a0>,
         ...,
         <scipy.io.matlab._mio5_params.mat_struct object at 0x7fabe484ff10>,
         <scipy.io.matlab._mio5_params.mat_struct object at 0x7fabe486a8b0>,
         <scipy.io.matlab._mio5_params.mat_struct object at 0x7fabe486ad00>]],
       dtype=object),
 'img_train': array([[0, 0, 0, ..., 1, 0, 0]], dtype=uint8),
 'version': array(['12'], dtype='<U2'),
 'single_person': array([[array([[1]], dtype=uint8)],
        [array([], shape=(0, 1), dtype=uint8)],
        [array([[1]], dtype=uint8)],
        ...,
        [array([[5]], dtype=uint8)],
        [array([], shape=(0, 1), dtype=uint8)],
        [array([], shape=(0, 1), dtype=uint8)]],

### Putting the [official docs](http://human-pose.mpi-inf.mpg.de/#download) here for reference...

#### Annotation description 
Annotations are stored in a matlab structure `RELEASE` having following fields

- `.annolist(imgidx)` - annotations for image `imgidx`
  - `.image.name` - image filename
  - `.annorect(ridx)` - body annotations for a person `ridx`
		  - `.x1, .y1, .x2, .y2` - coordinates of the head rectangle
		  - `.scale` - person scale w.r.t. 200 px height
		  - `.objpos` - rough human position in the image
		  - `.annopoints.point` - person-centric body joint annotations
		    - `.x, .y` - coordinates of a joint
		    - `id` - joint id 
[//]: # "(0 - r ankle, 1 - r knee, 2 - r hip, 3 - l hip, 4 - l knee, 5 - l ankle, 6 - pelvis, 7 - thorax, 8 - upper neck, 9 - head top, 10 - r wrist, 10 - r wrist, 12 - r shoulder, 13 - l shoulder, 14 - l elbow, 15 - l wrist)"
		    - `is_visible` - joint visibility
  - `.vidx` - video index in `video_list`
  - `.frame_sec` - image position in video, in seconds
 
- `img_train(imgidx)` - training/testing image assignment 
- `single_person(imgidx)` - contains rectangle id `ridx` of *sufficiently separated* individuals
- `act(imgidx)` - activity/category label for image `imgidx`
  - `act_name` - activity name
  - `cat_name` - category name
  - `act_id` - activity id
- `video_list(videoidx)` - specifies video id as is provided by YouTube. To watch video on youtube go to https://www.youtube.com/watch?v=video_list(videoidx) 


In [59]:
activity = matlab_mpii.__dict__['act'][1223][0].__dict__['act_name'][0]
if 'ball' in activity or 'Ball' in activity:
    print(activity)
#     print(annotation_mpii.__dict__['image'][1223, 0].__dict__['name'][0])
    print(matlab_mpii.__dict__['annolist'][0][1223].__dict__['image'][0, 0].__dict__['name'][0])

therapeutic exercise ball, Fitball exercise
094361592.jpg


In [75]:
swimming_images = []
for i in range(num_images):
    activity = matlab_mpii.__dict__['act'][i][0].__dict__['act_name']
    if activity.shape[0] == 0:
        continue 
    if 'swimming' in activity[0] or 'Swimming' in activity[0]:
        print(activity)
        img_name = matlab_mpii.__dict__['annolist'][0][i].__dict__['image'][0, 0].__dict__['name'][0]
        swimming_images.append(img_name)
        
print(len(swimming_images))

['swimming, synchronized']
['swimming, synchronized']
['swimming, synchronized']
['swimming, sidestroke, general']
['swimming, sidestroke, general']
['swimming, sidestroke, general']
['swimming, sidestroke, general']
['swimming, sidestroke, general']
['swimming, synchronized']
['swimming, synchronized']
['swimming, synchronized']
['swimming, synchronized']
['swimming, synchronized']
['swimming, synchronized']
['swimming, synchronized']
['swimming, synchronized']
['swimming, synchronized']
['swimming, synchronized']
['swimming, synchronized']
['swimming, synchronized']
['swimming, synchronized']
['swimming, synchronized']
['swimming, general']
['swimming, backstroke']
['swimming, backstroke']
['swimming, backstroke']
['swimming, general']
['swimming, backstroke']
['swimming, backstroke']
['swimming, backstroke']
['swimming, backstroke']
['swimming, backstroke']
['swimming, general']
['swimming, general']
['swimming, general']
['swimming, general']
['swimming, general']
['swimming, gener

In [79]:
def write_array_to_file(array, filename):
    with open(filename, 'w') as file:
        for item in array:
            file.write(str(item) + '\n')

In [82]:
file_name = 'data/mpii/swimming_images.txt'
write_array_to_file(swimming_images, file_name)

In [84]:
! head -20 data/mpii/swimming_images.txt 

003438852.jpg
052006802.jpg
054671028.jpg
061172075.jpg
041960006.jpg
024773099.jpg
001439349.jpg
011607264.jpg
093211828.jpg
056513318.jpg
069665082.jpg
092249938.jpg
070723863.jpg
051883806.jpg
005476164.jpg
033519724.jpg
080285345.jpg
000463202.jpg
064553436.jpg
015577326.jpg


## Annotating data