# Fit and configure scalers

In [None]:
# Get needed auxiliary files for colab
!git clone https://github.com/philippmatthes/diplom
%cd /content/diplom/src
!mkdir shl-dataset
!wget -nc -O shl-dataset/challenge-2019-train_torso.zip http://www.shl-dataset.org/wp-content/uploads/SHLChallenge2019/challenge-2019-train_torso.zip
!wget -nc -O shl-dataset/challenge-2019-train_bag.zip http://www.shl-dataset.org/wp-content/uploads/SHLChallenge2019/challenge-2019-train_bag.zip
!wget -nc -O shl-dataset/challenge-2019-train_hips.zip http://www.shl-dataset.org/wp-content/uploads/SHLChallenge2019/challenge-2019-train_hips.zip
!wget -nc -O shl-dataset/challenge-2020-train_hand.zip http://www.shl-dataset.org/wp-content/uploads/SHLChallenge2020/challenge-2020-train_hand.zip

In [None]:
# Switch to src dir and select tensorflow
%cd /content/diplom/src
%tensorflow_version 2.x

In [1]:
from sklearn.preprocessing import PowerTransformer

acc_scaler = PowerTransformer()
mag_scaler = PowerTransformer()
gyr_scaler = PowerTransformer()

In [2]:
from pathlib import Path

DATASET_DIRS = [
    Path('shl-dataset/challenge-2019-train_torso.zip'),
    Path('shl-dataset/challenge-2019-train_bag.zip'),
    Path('shl-dataset/challenge-2019-train_hips.zip'),
    Path('shl-dataset/challenge-2020-train_hand.zip'),
]

In [3]:
# Load the dataset
import numpy as np

from tqdm import tqdm

from tools.dataset import load_zipped_shl_dataset

acc_mag_conc = None
mag_mag_conc = None
gyr_mag_conc = None

for dataset_dir in DATASET_DIRS:
    # Load dataset from zip file into temporary directory
    dataset = load_zipped_shl_dataset(dataset_dir, tqdm=tqdm)
    if acc_mag_conc is None:
        acc_mag_conc = dataset.acc_mag
    else:
        acc_mag_conc = np.concatenate((acc_mag_conc, dataset.acc_mag), axis=0)
    if mag_mag_conc is None:
        mag_mag_conc = dataset.mag_mag
    else:
        mag_mag_conc = np.concatenate((mag_mag_conc, dataset.mag_mag), axis=0)
    if gyr_mag_conc is None:
        gyr_mag_conc = dataset.gyr_mag
    else:    
        gyr_mag_conc = np.concatenate((gyr_mag_conc, dataset.gyr_mag), axis=0)
    del dataset

Extracting shl-dataset\challenge-2019-train_torso.zip: 100%|██████████| 22/22 [03:06<00:00,  8.48s/it]


Acc_x Import Done
Acc_y Import Done
Acc_z Import Done
Acc_mag Import Done
Mag_x Import Done
Mag_y Import Done
Mag_z Import Done
Mag_mag Import Done
Gyr_x Import Done
Gyr_y Import Done
Gyr_z Import Done
Gyr_mag Import Done
Labels Import Done


Extracting shl-dataset\challenge-2019-train_bag.zip: 100%|██████████| 22/22 [03:36<00:00,  9.85s/it]


Acc_x Import Done
Acc_y Import Done
Acc_z Import Done
Acc_mag Import Done
Mag_x Import Done
Mag_y Import Done
Mag_z Import Done
Mag_mag Import Done
Gyr_x Import Done
Gyr_y Import Done
Gyr_z Import Done
Gyr_mag Import Done
Labels Import Done


Extracting shl-dataset\challenge-2019-train_hips.zip: 100%|██████████| 22/22 [03:37<00:00,  9.88s/it]


Acc_x Import Done
Acc_y Import Done
Acc_z Import Done
Acc_mag Import Done
Mag_x Import Done
Mag_y Import Done
Mag_z Import Done
Mag_mag Import Done
Gyr_x Import Done
Gyr_y Import Done
Gyr_z Import Done
Gyr_mag Import Done
Labels Import Done


Extracting shl-dataset\challenge-2020-train_hand.zip: 100%|██████████| 23/23 [03:47<00:00,  9.87s/it]


Acc_x Import Done
Acc_y Import Done
Acc_z Import Done
Acc_mag Import Done
Mag_x Import Done
Mag_y Import Done
Mag_z Import Done
Mag_mag Import Done
Gyr_x Import Done
Gyr_y Import Done
Gyr_z Import Done
Gyr_mag Import Done
Labels Import Done


In [4]:
# Check that we don't have NaNs in our dataset
assert not np.isnan(acc_mag_conc).any()
assert not np.isnan(mag_mag_conc).any()
assert not np.isnan(gyr_mag_conc).any()

# Fit and export scalers
print('Fitting acc scaler...')
acc_mag_scaled = acc_scaler.fit_transform(acc_mag_conc)
print('Fitting mag scaler...')
mag_mag_scaled = mag_scaler.fit_transform(mag_mag_conc)
print('Fitting gyr scaler...')
gyr_mag_scaled = gyr_scaler.fit_transform(gyr_mag_conc)

Fitting acc scaler...
Fitting mag scaler...
Fitting gyr scaler...


In [5]:
# Export transformers

import json
import joblib

for transformer, dir in [
    (acc_scaler, 'models/acc-scaler'),
    (mag_scaler, 'models/mag-scaler'),
    (gyr_scaler, 'models/gyr-scaler'),
]:
    # Platform independent export
    transformer_params = {
        'lambdas': list(transformer.lambdas_),
    }
    with open(dir + '.json', 'w') as f:
        f.write(json.dumps(transformer_params))
    # Python export
    joblib.dump(transformer, dir + '.joblib')