# Fit and configure scalers

In [None]:
# Get needed auxiliary files for colab
!git clone https://github.com/philippmatthes/diplom
%cd /content/diplom/src
!mkdir shl-dataset
!wget -nc -O shl-dataset/challenge-2019-train_torso.zip http://www.shl-dataset.org/wp-content/uploads/SHLChallenge2019/challenge-2019-train_torso.zip
!wget -nc -O shl-dataset/challenge-2019-train_bag.zip http://www.shl-dataset.org/wp-content/uploads/SHLChallenge2019/challenge-2019-train_bag.zip
!wget -nc -O shl-dataset/challenge-2019-train_hips.zip http://www.shl-dataset.org/wp-content/uploads/SHLChallenge2019/challenge-2019-train_hips.zip
!wget -nc -O shl-dataset/challenge-2020-train_hand.zip http://www.shl-dataset.org/wp-content/uploads/SHLChallenge2020/challenge-2020-train_hand.zip

In [None]:
# Switch to src dir and select tensorflow
%cd /content/diplom/src
%tensorflow_version 2.x

In [1]:
# Create our scalers
from sklearn.preprocessing import PowerTransformer

from tools.dataset import shl_dataset_X_attributes

scalers = dict([(a, PowerTransformer()) for a in shl_dataset_X_attributes])
scalers

{'acc_x': PowerTransformer(),
 'acc_y': PowerTransformer(),
 'acc_z': PowerTransformer(),
 'mag_x': PowerTransformer(),
 'mag_y': PowerTransformer(),
 'mag_z': PowerTransformer(),
 'gyr_x': PowerTransformer(),
 'gyr_y': PowerTransformer(),
 'gyr_z': PowerTransformer(),
 'gra_x': PowerTransformer(),
 'gra_y': PowerTransformer(),
 'gra_z': PowerTransformer(),
 'lacc_x': PowerTransformer(),
 'lacc_y': PowerTransformer(),
 'lacc_z': PowerTransformer(),
 'ori_x': PowerTransformer(),
 'ori_y': PowerTransformer(),
 'ori_z': PowerTransformer(),
 'ori_w': PowerTransformer()}

In [2]:
from pathlib import Path

DATASET_DIRS = [
    Path('shl-dataset/challenge-2019-train_torso.zip'),
    Path('shl-dataset/challenge-2019-train_bag.zip'),
    Path('shl-dataset/challenge-2019-train_hips.zip'),
    Path('shl-dataset/challenge-2020-train_hand.zip'),
]

In [3]:
# Load the datasets
from tqdm import tqdm

from tools.dataset import load_zipped_shl_dataset

dataset = None

for dataset_dir in DATASET_DIRS:
    # Load dataset from zip file into temporary directory
    partial_dataset = load_zipped_shl_dataset(dataset_dir, tqdm=tqdm)
    if dataset is None:
        dataset = partial_dataset
    else:
        dataset.concat_inplace(partial_dataset)

Extracting shl-dataset\challenge-2019-train_torso.zip: 100%|██████████| 22/22 [04:02<00:00, 11.01s/it]
Loading dataset subfiles: 100%|██████████| 20/20 [05:27<00:00, 16.39s/it]
Extracting shl-dataset\challenge-2019-train_bag.zip: 100%|██████████| 22/22 [04:07<00:00, 11.23s/it]
Loading dataset subfiles: 100%|██████████| 20/20 [05:40<00:00, 17.01s/it]
Extracting shl-dataset\challenge-2019-train_hips.zip:  18%|█▊        | 4/22 [01:17<05:47, 19.30s/it]


KeyboardInterrupt: 

In [None]:
import json
import joblib

export_dir = 'models/'

for attribute, scaler in tqdm(scalers.items(), desc='Fitting scalers'):
    scaler.fit(getattr(dataset, attribute))

    # Platform independent export
    transformer_params = {
        'lambdas': list(scaler.lambdas_),
    }
    with open(export_dir + f'{attribute}.scaler.json', 'w') as f:
        f.write(json.dumps(transformer_params))
    # Python export
    joblib.dump(scaler, export_dir + f'{attribute}.scaler.joblib')

Fitting acc scaler...
Fitting mag scaler...
Fitting gyr scaler...
