In [None]:
%load_ext autoreload
%autoreload 2

In [8]:
# Evaluating CNN+RNN models on the dataset

# Imports
import csv
import gc
import os, random
from collections import Counter
from datetime import datetime
from statistics import mode

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import classification_report

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential
from wandb.keras import WandbCallback

import wandb
from wp8.options.train_options import TrainOptions
# from wp8.pre_processing.generators import TimeSeriesGenerator as TSG
from wp8.pre_processing.utils import safe_mkdir
from wp8.utils.cnn_rnn_utils import get_timeseries_labels_encoded, load_and_split
from wp8.utils.dataset import TimeSeriesGenerator as TSG

In [None]:
os.environ["TF_CUDNN_DETERMINISTIC"] = "1"
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
tf.random.set_seed(hash("by removing stochasticity") % 2**32 - 1)

In [2]:
class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

mydict = {'val':'it works'}
nested_dict = {'val':'nested works too'}
mydict = dotdict(mydict)
mydict.val
# 'it works'

mydict.nested = dotdict(nested_dict)
mydict.nested.val
# 'nested works too'

'nested works too'

In [3]:
# opt = TrainOptions().parse()
opt = dotdict({
  "lstm1_units": 8,
  "lstm2_units": 32,
  "dense_units": 32,
  "dropout": 0.7,
  "epochs": 1000,
  "train_actors": [1, 2,3],
  "val_actors": [4],
  "train_cams": [1],
  "val_cams": [1],
  "seq_len": 20,
  "split_ratio": None,
  "drop_offair": False,
  "undersample": False,
  "batch_size": 120,
  "stride": 10,
  "learning_rate": 1e-4,
  "micro_classes": False,
})

# if set(opt.train_actors) & set(opt.val_actors):
#     raise Exception("Can't use the same actors both in train and validation splits")


In [4]:
X_train, y_train, X_val, y_val, cams_train, cams_val = load_and_split(opt.train_actors, opt.val_actors, opt.train_cams, opt.val_cams, opt.split_ratio, opt.drop_offair, opt.undersample, opt.micro_classes)
print(f"\nX_train shape: {X_train.shape}, len y_train: {len(y_train)}, X_val shape: {X_val.shape}, len y_val: {len(y_val)}\n")

[STATUS] Load Train Set


Loading features: outputs/dataset/features/Actor_3_Walk_Stick_Full_PH.npz: 100%|██████████| 44/44 [00:43<00:00,  1.01it/s]
Loading csv datasets: 100%|██████████| 44/44 [00:02<00:00, 21.51it/s]


[STATUS] Load Val Set


Loading features: outputs/dataset/features/Actor_4_Chair_Full_PH.npz: 100%|██████████| 6/6 [00:07<00:00,  1.18s/it]   
Loading csv datasets: 100%|██████████| 6/6 [00:00<00:00, 46.94it/s]



X_train shape: (144840, 2048), len y_train: 144840, X_val shape: (24060, 2048), len y_val: 24060



In [9]:
series_gen = TSG(opt)
X_train_series, y_train_series, class_weights, classes = series_gen.get_train_series(X_train, y_train, cams_train)
X_val_series, y_val_series = series_gen.get_val_series(X_val, y_val, cams_val)
print(f"\nX_train_series shape: {X_train_series.shape}, len y_train_series: {len(y_train_series)}, X_val_series shape: {X_val_series.shape}, len y_val_series: {len(y_val_series)}\n")

100%|██████████| 13277/13277 [00:00<00:00, 183853.57it/s]


ValueError: If using all scalar values, you must pass an index

In [None]:
# WANDB project initialization
run = wandb.init(
    project="Fall detection CNN + RNN",
    config={
        "model": "LSTM",
        "epochs": opt.epochs,
        "seq_len": opt.seq_len,
        "num_features": 2048,
        "batch_size": opt.batch_size,
        "stride": opt.stride,
        "loss_function": "sparse_categorical_crossentropy",
        "architecture": "LSTM",
        "train_actors": opt.train_actors,
        "val_actors": opt.val_actors,
        "train_cams": opt.train_cams,
        "val_cams": opt.val_cams,
        "micro_classes": opt.classes,
        "dropout": opt.dropout,
        "lstm1_units": opt.lstm1_units,
        "lstm2_units": opt.lstm2_units,
        "dense_units": opt.dense_units,
        "learning_rate": opt.learning_rate,
        "split_ratio": opt.split_ratio,
        "drop_offair": opt.drop_offair,
        "undersample": opt.undersample,
    },
)

cfg = wandb.config

In [None]:
y_train_series, y_val_series, enc, class_weights, classes = get_timeseries_labels_encoded(y_train, y_val, cfg)

In [None]:
train_gen = TSG(
    X=X_train,
    y=y_train,
    num_features=cfg.num_features,
    cams=cams_train,
    batch_size=cfg.batch_size,
    stride=cfg.stride,
    seq_len=cfg.seq_len,
    labels_encoder=enc,
)
val_gen = TSG(
    X=X_val,
    y=y_val,
    cams=cams_val,
    num_features=cfg.num_features,
    batch_size=cfg.batch_size,
    stride=cfg.stride,
    seq_len=cfg.seq_len,
    labels_encoder=enc,
)

In [None]:
model = Sequential()
model.add(LSTM(units=cfg.lstm1_units, input_shape=(cfg.seq_len, cfg.num_features)))
model.add(Dropout(cfg.dropout))
# model.add(LSTM(units=cfg.lstm2_units, input_shape=(cfg.seq_len, cfg.num_features)))
# model.add(Dropout(cfg.dropout))
# model.add(Dense(units=cfg.dense_units, activation="relu"))
# model.add(Dropout(cfg.dropout))
model.add(Dense(units=np.unique(y_train_series, axis=0).shape[0], activation="softmax"))
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=cfg.learning_rate),
    loss=cfg.loss_function,
    metrics=["accuracy"],
)
model.summary()


In [None]:

# Callbacks
dir_path = f"model_checkpoints/{cfg.model}"
safe_mkdir(dir_path)
now = datetime.now()
dt_string = now.strftime("%d-%m-%Y_%H:%M:%S")

model_checkpoint = ModelCheckpoint(
    filepath=f"{dir_path}/{cfg.model}_{dt_string}",
    monitor="val_accuracy",
    save_best_only=True,
    save_weights_only=True,
    initial_value_threshold=0.8,
    verbose=1,
)

reduce_lr = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.1,
    patience=30,
    verbose=1,
    mode="min",
    min_delta=1e-5,
    cooldown=1,
    min_lr=1e-6,
)

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=20,
    verbose=1,
    mode="min",
)

callbacks = [WandbCallback(), reduce_lr, early_stop]

In [None]:
# Train Model
history = model.fit(X_train_series, y_train_series, epochs=cfg.epochs, callbacks=callbacks, class_weight=class_weights)
val_gen.evaluate = True

In [None]:
# Evaluate Model
val_logits = model.predict(val_gen, verbose=1)

In [None]:
# free up memory
del X_train
del y_train
del X_val
del y_val

gc.collect()

In [None]:
# Log metrics to wandb
y_pred_val_classes = np.argmax(val_logits, axis=1).tolist()

cr = classification_report(y_val_series, y_pred_val_classes, target_names=classes) 

wandb.log({
  "Classification Report": cr,
})

wandb.sklearn.plot_roc(y_val_series, val_logits, classes)
wandb.sklearn.plot_class_proportions(y_train_series, y_val_series, classes)
wandb.sklearn.plot_precision_recall(y_val_series, val_logits, classes)
wandb.sklearn.plot_confusion_matrix(y_val_series, y_pred_val_classes, classes)
wandb.join()

In [None]:
print(cr)