# Evaluating models on the dataset

## Imports

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from wp8.pre_processing.utils import listdir_nohidden_sorted as lsdir
from tqdm.notebook import tqdm
from wp8.pre_processing.generators import TimeSeriesGenerator as TSG
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import wandb
from wandb.keras import WandbCallback
from sklearn.metrics import classification_report
from statistics import mode

### Set random seeds

In [2]:
np.random.seed(2)
tf.random.set_seed(2)

In [3]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mandreaapi[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
# %env WANDB_API_KEY=$a22c5c63cb14ecd62db2141ec9ca69d588a6483e

## Load dataset and features

In [5]:
features_path = "../outputs/dataset/features/"
dataset_path = "../outputs/dataset/dataset/"

#load features
all_features = []
all_features_paths = lsdir(features_path)[0:1]
for _, feature_file in enumerate(tqdm(all_features_paths)):
  with np.load(feature_file) as features:
      all_features.append(features["arr_0"])
      
all_features=np.concatenate(all_features, axis=0)

  0%|          | 0/1 [00:00<?, ?it/s]

In [6]:
dfs = []
for _,filename in enumerate(tqdm(lsdir(dataset_path)[0:1])):
  df = pd.read_csv(filename, index_col=0)
  dfs.append(df)

dataset = pd.concat(dfs, ignore_index=True)

  0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
print(dataset.shape, all_features.shape)

(30240, 4) (30240, 2048)


In [8]:
dataset.head(-10)

Unnamed: 0,micro_labels,macro_labels,ar_labels,frame_name
0,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0000
1,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0001
2,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0002
3,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0003
4,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0004
...,...,...,...,...
30225,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_7_4305
30226,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_7_4306
30227,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_7_4307
30228,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_7_4308


In [9]:
names = dataset["frame_name"]
cams = []
for name in names:
  cams.append(int(name[-6]))

dataset["cams"] = pd.Series(cams)

dataset.head()

Unnamed: 0,micro_labels,macro_labels,ar_labels,frame_name,cams
0,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0000,1
1,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0001,1
2,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0002,1
3,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0003,1
4,lie_still,lying_down,actor_repositioning,actor_1_bed_cam_1_0004,1


In [10]:
dataset["features"] = pd.Series(all_features.tolist())

In [11]:
#count samples per label
dataset["micro_labels"].value_counts()

lie_still                 12705
stand_up_from_floor        4172
lie_down_from_sitting      3199
sit_up_from_lying          2681
stand_up_from_sit          1680
sit_down_from_standing     1449
fall_lateral               1043
lie_down_on_the_floor       959
fall_frontal                742
fall_crouch                 651
sit_still                   532
stand_still                 427
Name: micro_labels, dtype: int64

In [12]:
le = preprocessing.LabelEncoder()
encoded_labels = le.fit_transform(dataset["micro_labels"])
n_labels = len(np.unique(encoded_labels))
print("n_labels: ", n_labels)

n_labels:  12


In [13]:
run = wandb.init(project = "WP8",
                 config = {
                   "epochs": 2,
                    "sequence_length": 20,
                    "num_features": 2048,
                    "batch_size": 40,
                    "sliding_window_stride": 10,
                   "loss_function": "sparse_categorical_crossentropy",
                   "architecture": "LSTM",
                   "dataset": "single_file",
                   "train_test_split": 0.7,
                   "dropout":0.8,
                   "lstm1_units": 32,
                   "learning_rate": 0.01
                 })
config = wandb.config

## Train Test split

In [14]:
split_ratio = int(dataset.shape[0] * config.train_test_split)
X_train = np.array(dataset["features"][0:split_ratio].tolist())
X_test = np.array(dataset["features"][split_ratio:].tolist())

y_train = encoded_labels[0:split_ratio]
y_test = encoded_labels[split_ratio:]

cams_train = dataset["cams"][0:split_ratio]
cams_test = dataset["cams"][split_ratio:]

print(f"X_train shape :{X_train.shape}, y_train shape: {y_train.shape}, X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_train shape :(21168, 2048), y_train shape: (21168,), X_test shape: (9072, 2048), y_test shape: (9072,)


In [15]:
print(dataset["frame_name"][split_ratio], dataset["frame_name"][split_ratio+1])

actor_1_bed_cam_5_3888 actor_1_bed_cam_5_3889


## Train

In [16]:
train_gen = TSG(X=X_train, y = y_train, num_features=config.num_features, cams=cams_train.tolist(), batch_size = config.batch_size, stride=config.sliding_window_stride, seq_len = config.sequence_length)

test_gen = TSG(X=X_test, y = y_test, cams=cams_test.tolist(), num_features=config.num_features, batch_size = config.batch_size, stride=config.sliding_window_stride, seq_len = config.sequence_length)

model = Sequential()
model.add(LSTM(units=config.lstm1_units, input_shape=(20, config.num_features)))
model.add(Dropout(config.dropout))
model.add(Dense(n_labels, activation = "softmax"))
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=config.learning_rate), loss=config.loss_function, metrics=["accuracy", "sparse_categorical_accuracy"]) 
model.summary()

Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 32)                266368    
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 12)                396       
                                                                 
Total params: 266,764
Trainable params: 266,764
Non-trainable params: 0
_________________________________________________________________


2022-05-14 20:29:04.650610: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-05-14 20:29:04.650785: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [17]:
history = model.fit(train_gen, validation_data=test_gen, epochs=config.epochs, callbacks=[WandbCallback()], max_queue_size=1)
test_gen.evaluate = True



Epoch 1/2


2022-05-14 20:29:08.474461: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-05-14 20:29:08.916524: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-14 20:29:09.044768: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


  2/529 [..............................] - ETA: 31s - loss: 3.8377 - accuracy: 0.1667 - sparse_categorical_accuracy: 0.1667 

2022-05-14 20:29:09.260186: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2022-05-14 20:29:17.464081: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-14 20:29:17.519080: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2


In [None]:
history.history.keys()

In [18]:
print(test_gen.get_item_calls)

453


In [20]:
test_gen.n_windows

3

In [19]:
453*test_gen.n_windows

1359

In [26]:
# def to_series_labels(timestep_labels, batch_size, seq_len, stride):
#   s = 0
#   series_labels=[]
#   while s + seq_len <=len(timestep_labels):
#     labels_seq = timestep_labels[s:s+seq_len]
#     series_labels.append(mode(labels_seq))
#     s+=stride
#   return series_labels

def to_series_labels(timestep_labels,n_batches, n_windows, seq_len, stride):
  series_labels = []
  for w in range(n_windows*n_batch):
    s = w * stride
    labels_seq = timestep_labels[s:s+seq_len]
    series_labels.append(mode(labels_seq))
  return series_labels


series_labels = to_series_labels(y_test,config.batch_size, test_gen.n_windows, config.sequence_length, config.sliding_window_stride)
print(len(series_labels))
    

120


In [None]:
test_gen.n_series_labels

In [None]:
gen_y_test_series = test_gen.series_labels
len(gen_y_test_series)

In [None]:
print(test_gen.ys_count)

In [21]:
n_labels = test_gen.n_windows * (y_test.shape[0] // test_gen.batch_size)
n_labels

678

In [22]:
y_pred = model.predict_generator(test_gen, verbose=1)

  y_pred = model.predict_generator(test_gen, verbose=1)
2022-05-14 20:32:16.628946: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-14 20:32:16.669270: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




In [23]:
y_pred.shape

(678, 12)

In [None]:
pd.Series(series_labels).value_counts()

In [None]:
y_pred_labels = np.argmax(y_pred, axis=1)
y_pred_labels[0]

In [None]:
pd.Series(y_pred_labels).value_counts()

In [None]:
print(classification_report(y_pred_labels, series_labels))

In [None]:
wandb.run.finish()