<a href="https://colab.research.google.com/github/PhilippMatthes/diplom/blob/master/src/shl-deep-learning-timeseries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Using a deep CNN to directly classify SHL timeseries data

In [1]:
# Get needed auxiliary files for colab
!git clone https://github.com/philippmatthes/diplom
%cd /content/diplom/src
!mkdir shl-dataset
!wget -nc -O shl-dataset/challenge-2019-train_torso.zip http://www.shl-dataset.org/wp-content/uploads/SHLChallenge2019/challenge-2019-train_torso.zip
!wget -nc -O shl-dataset/challenge-2019-train_bag.zip http://www.shl-dataset.org/wp-content/uploads/SHLChallenge2019/challenge-2019-train_bag.zip
!wget -nc -O shl-dataset/challenge-2019-train_hips.zip http://www.shl-dataset.org/wp-content/uploads/SHLChallenge2019/challenge-2019-train_hips.zip
!wget -nc -O shl-dataset/challenge-2020-train_hand.zip http://www.shl-dataset.org/wp-content/uploads/SHLChallenge2020/challenge-2020-train_hand.zip

fatal: destination path 'diplom' already exists and is not an empty directory.
/content/diplom/src
mkdir: cannot create directory ‘shl-dataset’: File exists
File ‘shl-dataset/challenge-2019-train_torso.zip’ already there; not retrieving.
File ‘shl-dataset/challenge-2019-train_bag.zip’ already there; not retrieving.
File ‘shl-dataset/challenge-2019-train_hips.zip’ already there; not retrieving.
File ‘shl-dataset/challenge-2020-train_hand.zip’ already there; not retrieving.


In [2]:
# Switch to src dir and select tensorflow
%cd /content/diplom/src
%tensorflow_version 2.x

/content/diplom/src


In [3]:
# Define all datasets to train our model on

from pathlib import Path

DATASET_DIRS = [
    Path('shl-dataset/challenge-2019-train_torso.zip'),
    Path('shl-dataset/challenge-2019-train_bag.zip'),
    Path('shl-dataset/challenge-2019-train_hips.zip'),
    Path('shl-dataset/challenge-2020-train_hand.zip'),
]

In [4]:
from tensorflow import keras

# Check that we can use our GPU, to not wait forever during training
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 3792038049927744333, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 16183459840
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 13823030941224833537
 physical_device_desc: "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0"]

In [5]:
import numpy as np

from tqdm import tqdm

from tools.dataset import load_zipped_shl_dataset

dataset = None

for dataset_dir in DATASET_DIRS:
    # Load dataset from zip file into temporary directory
    partial_dataset = load_zipped_shl_dataset(dataset_dir, tqdm=tqdm)
    if dataset is None:
        dataset = partial_dataset
    else:
        dataset.concat_inplace(partial_dataset)
        del partial_dataset    

Extracting shl-dataset/challenge-2019-train_torso.zip:  32%|███▏      | 7/22 [00:52<02:07,  8.50s/it]

KeyboardInterrupt: ignored

In [None]:
import joblib

from collections import OrderedDict

from tools.dataset import shl_dataset_X_attributes

scalers = OrderedDict([(
    a, joblib.load(f'models/shl-scalers/{a}.scaler.joblib')
) for a in shl_dataset_X_attributes])

In [None]:
# Prepare training data

scaled_attr_tracks = []
for a, scaler in tqdm(scalers.items()):
    track = getattr(dataset, a)
    scaled_attr_tracks.append(scaler.transform(track))
    del track

X = np.stack(scaled_attr_tracks, axis=2)

X.shape

In [None]:
y = dataset.labels[:, 0].astype(np.int)

y.shape

In [None]:
del dataset

In [None]:
from sklearn.utils.class_weight import compute_class_weight
# Compute class weights for unbiased training
class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
class_weights = dict(zip(np.unique(y), class_weights)) # Keras adaption
# Fill in NULL class for tf 2.x
class_weights[0] = 0
class_weights

In [None]:
# Create our model

from tensorflow.keras import layers

from architectures.resnet import make_resnet
from tools.dataset import shl_dataset_label_order

MODEL_DIR = Path('models/shl-resnet-all-attributes')

model = make_resnet(
    input_shape=[X.shape[1], X.shape[2]], 
    output_classes=len(shl_dataset_label_order)
)

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['acc']
)

In [None]:
model.summary()

In [None]:
# Train model
callbacks = [
    keras.callbacks.CSVLogger(f'train.log', append=False),
    keras.callbacks.ModelCheckpoint(
        str(MODEL_DIR), save_best_only=True, monitor='val_loss', verbose=1
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.25, patience=25, min_lr=0.0001, verbose=1
    ),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=50, verbose=1),
]
model.fit(
    X, 
    y,
    epochs=200, 
    batch_size=32,
    callbacks=callbacks,
    validation_split=0.1,
    verbose=1,
    class_weight=class_weights,
    shuffle=True
)