In [20]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [121]:
# Evaluating CNN+RNN models on the dataset

# Imports
import csv
import gc
import os
from collections import Counter
from datetime import datetime
from statistics import mode

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder

from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential
from wandb.keras import WandbCallback

import wandb
from wp8.options.train_options import TrainOptions
from wp8.pre_processing.generators import TimeSeriesGenerator as TSG
from wp8.pre_processing.utils import safe_mkdir
from wp8.utils.cnn_rnn_utils import get_timeseries_labels_encoded, load_and_split

In [122]:
class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

mydict = {'val':'it works'}
nested_dict = {'val':'nested works too'}
mydict = dotdict(mydict)
mydict.val
# 'it works'

mydict.nested = dotdict(nested_dict)
mydict.nested.val
# 'nested works too'

'nested works too'

In [123]:
# opt = TrainOptions().parse()
opt = dotdict({
  "lstm1_units": 256,
  "lstm2_units": 128,
  "dropout": 0.3,
  "epochs": 50,
  "train_actors": [1, 2,3],
  "val_actors": [4],
  "train_cams": [1, 2],
  "val_cams": [1],
  "seq_len": 20,
  "split_ratio": None,
  "drop_offair": False,
  "undersample": False,
  "batch_size": 60,
  "stride": 10,
  "learning_rate": 1e-5,
})

# if set(opt.train_actors) & set(opt.val_actors):
#     raise Exception("Can't use the same actors both in train and validation splits")


In [124]:
# WANDB project initialization
run = wandb.init(
    project="Fall detection CNN + RNN",
    config={
        "model": "LSTM",
        "epochs": opt.epochs,
        "seq_len": opt.seq_len,
        "num_features": 2048,
        "batch_size": opt.batch_size,
        "stride": opt.stride,
        "loss_function": "sparse_categorical_crossentropy",
        "architecture": "LSTM",
        "train_actors": opt.train_actors,
        "val_actors": opt.val_actors,
        "train_cams": opt.train_cams,
        "val_cams": opt.val_cams,
        "dropout": opt.dropout,
        "lstm1_units": opt.lstm1_units,
        "lstm2_units": opt.lstm2_units,
        "learning_rate": opt.learning_rate,
        "split_ratio": opt.split_ratio,
        "drop_offair": opt.drop_offair,
        "undersample": opt.undersample,
    },
)

cfg = wandb.config

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

In [125]:
X_train, y_train, X_val, y_val, cams_train, cams_val = load_and_split(opt.train_actors, opt.val_actors, opt.train_cams, opt.val_cams, opt.split_ratio, opt.drop_offair, opt.undersample)
print(f"\nX_train shape: {X_train.shape}, len y_train: {len(y_train)}, X_val shape: {X_val.shape}, len y_val: {len(y_val)}\n")

[STATUS] Load Train Set


  0%|          | 0/44 [00:00<?, ?it/s]

In [None]:
y_train_series, y_val_series, enc, class_weights = get_timeseries_labels_encoded(y_train, y_val, cfg)


Class weights for train series: {'crouched_still': 19.192307692307693, 'fall_crouch': 3.9167974882260594, 'fall_frontal': 4.361888111888112, 'fall_lateral': 2.741758241758242, 'lie_down_from_sitting': 0.6953734671125975, 'lie_down_on_the_floor': 2.020242914979757, 'lie_still': 0.1894600956792467, 'sit_down_from_standing': 1.6545092838196287, 'sit_still': 6.397435897435898, 'sit_up_from_lying': 0.783359497645212, 'stand_still': 2.9079254079254078, 'stand_up_from_floor': 0.5515030946065429, 'stand_up_from_sit': 1.4430306535569692}


In [None]:
len(class_weights)

13

In [None]:
print(y_train_series.shape)
np.unique(y_train_series, axis=0).shape[0]

(2495, 13)


13

In [None]:
train_gen = TSG(
    X=X_train,
    y=y_train,
    num_features=cfg.num_features,
    cams=cams_train,
    batch_size=cfg.batch_size,
    stride=cfg.stride,
    seq_len=cfg.seq_len,
    labels_encoder=enc,
)
val_gen = TSG(
    X=X_val,
    y=y_val,
    cams=cams_val,
    num_features=cfg.num_features,
    batch_size=cfg.batch_size,
    stride=cfg.stride,
    seq_len=cfg.seq_len,
    labels_encoder=enc,
)

In [None]:

model = Sequential()
model.add(LSTM(units=cfg.lstm1_units, input_shape=(cfg.seq_len, cfg.num_features), return_sequences=True))
model.add(Dropout(cfg.dropout))
model.add(LSTM(units=cfg.lstm2_units, input_shape=(cfg.seq_len, cfg.num_features)))
model.add(Dropout(cfg.dropout))
model.add(Dense(np.unique(y_train_series, axis=0).shape[0], activation="softmax"))
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=cfg.learning_rate),
    loss=cfg.loss_function,
    metrics=["accuracy", "categorical_crossentropy"],
)
model.summary()

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_28 (LSTM)              (None, 20, 256)           2360320   
                                                                 
 dropout_28 (Dropout)        (None, 20, 256)           0         
                                                                 
 lstm_29 (LSTM)              (None, 128)               197120    
                                                                 
 dropout_29 (Dropout)        (None, 128)               0         
                                                                 
 dense_14 (Dense)            (None, 13)                1677      
                                                                 
Total params: 2,559,117
Trainable params: 2,559,117
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Callbacks
dir_path = f"model_checkpoints/{cfg.model}"
safe_mkdir(dir_path)
now = datetime.now()
dt_string = now.strftime("%d/%m/%Y_%H:%M:%S")
model_checkpoint = ModelCheckpoint(
    filepath=f"{dir_path}/{cfg.model}_{dt_string}",
    monitor="val_accuracy",
    save_best_only=True,
    save_weights_only=True,
    initial_value_threshold=0.8,
    verbose=1,
)

callbacks = [WandbCallback(), model_checkpoint]

In [None]:
# Train Model
history = model.fit(train_gen, validation_data=val_gen, epochs=cfg.epochs, callbacks=callbacks, class_weight=class_weights)
val_gen.evaluate = True

In [None]:
# Evaluate Model
val_logits = model.predict(val_gen, verbose=1)

In [None]:
# free up memory
del X_train
del y_train
del X_val
del y_val

gc.collect()


In [None]:
# Log metrics to wandb
y_pred_val_classes = np.argmax(val_logits, axis=1).tolist()

# wandb.sklearn.plot_roc(y_val_series, val_logits, classes)
# wandb.sklearn.plot_class_proportions(y_train_series, y_val_series, classes)
# wandb.sklearn.plot_precision_recall(y_val_series, val_logits, classes)
# wandb.sklearn.plot_confusion_matrix(y_val_series, y_pred_val_classes, classes)
wandb.join()