# Evaluating models on the dataset

## Imports

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from wp8.pre_processing.utils import listdir_nohidden_sorted as lsdir
from wp8.pre_processing.utils import safe_mkdir
from tqdm.notebook import tqdm
from wp8.pre_processing.generators import TimeSeriesGenerator as TSG
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import wandb
from wandb.keras import WandbCallback
from sklearn.metrics import classification_report
from statistics import mode
from datetime import datetime
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.metrics import Accuracy, SparseCategoricalAccuracy, Recall, Precision

### Set random seeds

In [2]:
np.random.seed(2)
tf.random.set_seed(2)


In [3]:
wandb.login()


[34m[1mwandb[0m: Currently logged in as: [33mandreaapi[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
# %env WANDB_API_KEY=$a22c5c63cb14ecd62db2141ec9ca69d588a6483e


## Load dataset and features

In [5]:
features_path = "../outputs/dataset/features/"
dataset_path = "../outputs/dataset/dataset/"

# load features
all_features = []
all_features_paths = lsdir(features_path)
for _, feature_file in enumerate(tqdm(all_features_paths)):
    with np.load(feature_file) as features:
        all_features.append(features["arr_0"])

all_features = np.concatenate(all_features, axis=0)


  0%|          | 0/1 [00:00<?, ?it/s]

In [6]:
dfs = []
for _, filename in enumerate(tqdm(lsdir(dataset_path))):
    df = pd.read_csv(filename, index_col=0)
    dfs.append(df)

dataset = pd.concat(dfs, ignore_index=True)


  0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
print(dataset.shape, all_features.shape)


(30240, 4) (30240, 2048)


In [8]:
dataset.head(-10)


Unnamed: 0,micro_labels,macro_labels,ar_labels,frame_name
0,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0000
1,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0001
2,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0002
3,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0003
4,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0004
...,...,...,...,...
30225,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_7_4305
30226,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_7_4306
30227,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_7_4307
30228,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_7_4308


In [9]:
names = dataset["frame_name"]
cams = []
for name in names:
    cams.append(int(name[-6]))

dataset["cams"] = pd.Series(cams)

dataset.head()


Unnamed: 0,micro_labels,macro_labels,ar_labels,frame_name,cams
0,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0000,1
1,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0001,1
2,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0002,1
3,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0003,1
4,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0004,1


In [11]:
# count samples per label, get labels names, encode labels to integers
dataset["micro_labels"].value_counts()
micro_labels_names = dataset["micro_labels"].unique().tolist()

le = preprocessing.LabelEncoder()
encoded_labels = le.fit_transform(dataset["micro_labels"])
n_labels = len(np.unique(encoded_labels))
print("n_labels: ", n_labels)


n_labels:  12


In [12]:
len(micro_labels_names)
micro_labels_names

['lie_still',
 'sit_up_from_lying',
 'stand_up_from_sit',
 'stand_still',
 'sit_down_from_standing',
 'lie_down_from_sitting',
 'fall_frontal',
 'lie_down_on_the_floor',
 'stand_up_from_floor',
 'fall_lateral',
 'sit_still',
 'fall_crouch']

### WANDB project initialization

In [13]:
run = wandb.init(
    project="Fall detection CNN + RNN",
    config={
        "model": "LSTM",
        "epochs": 5,
        "sequence_length": 20,
        "num_features": 2048,
        "batch_size": 40,
        "sliding_window_stride": 10,
        "loss_function": "sparse_categorical_crossentropy",
        "architecture": "LSTM",
        "dataset": "Actor_1_Bed",
        "dropout": 0.8,
        "lstm1_units": 32,
        "learning_rate": 0.01,
        "split_ratio": 0.7
    },
)

config = wandb.config


## Train Test split

In [14]:
split = int(dataset.shape[0] * config.split_ratio)
X_train = np.array(dataset["features"][0:split].tolist())
X_test = np.array(dataset["features"][split:].tolist())

y_train = encoded_labels[0:split]
y_test = encoded_labels[split:]

cams_train = dataset["cams"][0:split]
cams_test = dataset["cams"][split:]

print(f"X_train shape :{X_train.shape}, y_train shape: {y_train.shape}, X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


X_train shape :(21168, 2048), y_train shape: (21168,), X_test shape: (9072, 2048), y_test shape: (9072,)


In [15]:
print(f'Last train frame: {dataset["frame_name"][split]}\nFirst test frame: {dataset["frame_name"][split+1]}')


Last train frame: actor_1_bed_cam_5_3888
First test frame: actor_1_bed_cam_5_3889


## Model

In [16]:
train_gen = TSG(
    X=X_train,
    y=y_train,
    num_features=config.num_features,
    cams=cams_train.tolist(),
    batch_size=config.batch_size,
    stride=config.sliding_window_stride,
    seq_len=config.sequence_length,
)
test_gen = TSG(
    X=X_test,
    y=y_test,
    cams=cams_test.tolist(),
    num_features=config.num_features,
    batch_size=config.batch_size,
    stride=config.sliding_window_stride,
    seq_len=config.sequence_length,
)

model = Sequential()
model.add(LSTM(units=config.lstm1_units, input_shape=(20, config.num_features)))
model.add(Dropout(config.dropout))
model.add(Dense(n_labels, activation="softmax"))
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=config.learning_rate),
    loss=config.loss_function,
    metrics=["accuracy","sparse_categorical_accuracy"],
)
model.summary()


Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 32)                266368    
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 12)                396       
                                                                 
Total params: 266,764
Trainable params: 266,764
Non-trainable params: 0
_________________________________________________________________


2022-05-15 17:15:14.616719: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-05-15 17:15:14.616849: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


### Callbacks

In [17]:
dir_path = f"experiments/model_checkpoint/{config.model}_{config.dataset}"
safe_mkdir(dir_path)
now = datetime.now()
dt_string = now.strftime("%d/%m/%Y_%H:%M:%S")
model_checkpoint = ModelCheckpoint(
    filepath=f"{dir_path}/{config.model}_{dt_string}",
    monitor="val_accuracy",
    save_best_only=True,
    save_weights_only=True,
)
callbacks = [WandbCallback(), model_checkpoint]




## Train

In [18]:
history = model.fit(train_gen, validation_data=test_gen, epochs=config.epochs, callbacks=callbacks)
test_gen.evaluate = True


Epoch 1/5


2022-05-15 17:15:18.650777: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-05-15 17:15:19.062505: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-15 17:15:19.191267: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


  4/529 [..............................] - ETA: 8s - loss: 5.3251 - acc: 0.1667 - sparse_categorical_accuracy: 0.1667  

2022-05-15 17:15:19.358343: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2022-05-15 17:15:26.440739: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-15 17:15:26.489767: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## Evaluate

In [19]:
test_logits = model.predict_generator(test_gen, verbose=1)


  test_logits = model.predict_generator(test_gen, verbose=1)
2022-05-15 17:16:02.242771: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-15 17:16:02.281972: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




In [20]:
print(
    f"test_gen.n_windows: {test_gen.n_windows}\n\ntest_gen series_labels length: {len(test_gen.series_labels)}\n\nCorrect number of labels: {test_gen.n_windows * (y_test.shape[0] // test_gen.batch_size)}\n\nlogits shape: {test_logits.shape}"
)


test_gen.n_windows: 3

test_gen series_labels length: 3393

Correct number of labels: 678

logits shape: (678, 12)


In [21]:
def to_series_labels(timestep_labels: list, n_batches: int, n_windows: int, seq_len: int, stride: int) -> list:
    series_labels = []
    for w in range(n_windows * n_batches):
        s = w * stride
        labels_seq = timestep_labels[s : s + seq_len]
        series_labels.append(mode(labels_seq))
    return series_labels


### Log metrics to wandb

In [22]:
y_pred_test_classes = np.argmax(test_logits, axis=1).tolist()
y_train_series = to_series_labels(
    y_train,
    train_gen.n_batches,
    train_gen.n_windows,
    train_gen.seq_len,
    train_gen.stride,
)
y_test_series = to_series_labels(y_test, test_gen.n_batches, test_gen.n_windows, test_gen.seq_len, test_gen.stride)
wandb.sklearn.plot_roc(y_test_series, test_logits, micro_labels_names)
wandb.sklearn.plot_class_proportions(y_train_series, y_test_series, micro_labels_names)
wandb.sklearn.plot_precision_recall(y_test_series, test_logits, micro_labels_names)
wandb.sklearn.plot_confusion_matrix(y_test_series, y_pred_test_classes, micro_labels_names)




In [23]:
wandb.join()

VBox(children=(Label(value='3.143 MB of 3.149 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.998097…

0,1
acc,▁▁▅▄█
epoch,▁▃▅▆█
loss,█▁▂▂▁
sparse_categorical_accuracy,▁▁▅▄█
val_acc,█▂▂▁▂
val_loss,▇▆▃█▁
val_sparse_categorical_accuracy,█▂▂▁▂

0,1
acc,0.40517
best_epoch,4.0
best_val_loss,2.16952
epoch,4.0
loss,2.06734
sparse_categorical_accuracy,0.40517
val_acc,0.10619
val_loss,2.16952
val_sparse_categorical_accuracy,0.10619
