Environments:
- ```dlmines``` on GPU-equipped machines
- ```tp_mines``` on base

Launch Jupyter from ```tp_mines```

- Copy from bucket:

```gsutil cp -r gs://tp_mines/* ~/tp_mines/```

- Copy to bucket:

```gsutil config -b``` + follow procedure
```gsutil cp -r ~/tp_mines/* gs://tp_mines/```

In [10]:
originals_dir = "pose_data/original/"
downsampled_dir = "pose_data/downsampled/"

full_dir = downsampled_dir
training_dir = "pose_data/training/"
validation_dir = "pose_data/validation/"

## Imports

In [7]:
import os
from IPython.display import clear_output
from PIL import Image
from shutil import copyfile
import numpy as np

from tpbasics import img_numpy, show_img_file

## Downsampling

In [8]:
original_dim = (3968, 2976)
downsampling_factor = 8
downsampled_dim = tuple(dim // downsampling_factor for dim in original_dim)

In [9]:
for pose_dir in os.listdir(originals_dir):
    
    originals_pose_path = originals_dir + pose_dir + "/"
    downsampled_pose_path = downsampled_dir + pose_dir + "/"
    
    img_names = os.listdir(originals_pose_path)
    
    if not os.path.exists(downsampled_pose_path):
        os.makedirs(downsampled_pose_path)
    
    for i, img_name in enumerate(img_names):
        clear_output()
        print("{2} -    {0}/{1}".format(i + 1, len(img_names), pose_dir))
        
        im = Image.open(originals_pose_path + img_name)
        rim = im.resize((downsampled_dim[0], downsampled_dim[1]), Image.ANTIALIAS)
        rim.save(downsampled_pose_path + img_name)

Position_4 -    232/232


## Training-validation split

In [11]:
seed = 0
train_fraction = 0.8

In [12]:
np.random.seed(seed)

for pose_dir in os.listdir(full_dir):
    
    full_pose_path = full_dir + pose_dir + "/"
    training_pose_path = training_dir + pose_dir + "/"
    validation_pose_path = validation_dir + pose_dir + "/"
    
    if not os.path.exists(training_pose_path):
        os.makedirs(training_pose_path)
        
    if not os.path.exists(validation_pose_path):
        os.makedirs(validation_pose_path)
    
    img_names = os.listdir(full_pose_path)
    n_images = len(img_names)
    
    # draw random indices
    idx_training = set(np.random.choice(n_images, size = round(train_fraction * n_images), replace=False))
    
    # mask for training
    use_for_training = [i in idx_training for i in range(n_images)]
    
    # copy to train or validation directory
    for i, img_name in enumerate(img_names):
        dest_pose_path = training_pose_path if use_for_training[i] else validation_pose_path
        copyfile(full_pose_path + img_name, dest_pose_path + img_name)