In [93]:
import tensorflow as tf
from tensorflow.keras import models, layers, regularizers
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder


import sys
sys.path.append("..")
from utils.preprocess import *

import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Bidirectional
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut

In [94]:
exported_path = '../exported_models/'

In [95]:
input_directory = "../assets/Respeck"

data_list = []
label_list = []
total_rows = 0

all_occu = {}



# Define window size and overlap
window_size = 50
overlap_size = 25  # This is often set to 50% of the window size

all_occu = {}
data_windows = []



scaler = StandardScaler()

for folder_name in os.listdir(input_directory):
    folder_path = os.path.join(input_directory, folder_name)
    if folder_name == ".DS_Store":
        continue
    for file in os.listdir(folder_path):
        if file.endswith(".csv") and "unprocessed" not in file and 'normal' in file:
            
            filename = os.path.join(folder_path, file)
            activity, activity_sub, _ = extract_activity_and_status(filename)
            if "sitting" in activity or "standing" in activity:
                label = 'sitting/standing' + "_" + activity_sub
            else:
                label = activity + "_" + activity_sub
            
            
            if label not in all_occu:
                all_occu[label] = {'count': 1, 'directories': [filename]}
            else:
                all_occu[label]['count'] += 1
                all_occu[label]['directories'].append(filename)
            
            df = pd.read_csv(filename, usecols=[2, 3, 4])

            # Determine the number of rows to take from this file
            rows_to_take = min(700 - total_rows, len(df))
            
            # Update the df to only contain the necessary rows and update our counter
            df = df.head(rows_to_take)

            # Apply sliding window technique
            for start in range(0, len(df), window_size - overlap_size):
                end = start + window_size
                window = df.iloc[start:end].values  # Convert the window to a NumPy array
                if len(window) < 50:
                    continue

                # Normalize the window data
                # if not data_windows:  # If this is the first window, fit the scaler
                #     scaler.fit(window)
                # window_normalized = scaler.transform(window)  # Transform the data
                
                # # Add the normalized window and label to the lists
                data_windows.append(window)
                label_list.append(label)
# print(data_windows)

In [74]:
print(len(data_windows[0]))
print(len(data_windows))

wrong_list = []

for data in data_windows:
    if len(data) != 50:
        wrong_list.append(data)

print(len(wrong_list))

50
14904
0


In [96]:
unique_label_list = list(set(label_list))

In [97]:
unique_label_list

['normal walking_normal',
 'lying down on left_normal',
 'lying down on stomach_normal',
 'miscellaneous movements_normal',
 'sitting/standing_normal',
 'ascending stairs_normal',
 'lying down right_normal',
 'lying down back_normal',
 'running_normal',
 'shuffle walking_normal',
 'descending stairs_normal']

In [98]:
def create_model_offline(input_shape, num_classes):
    model = models.Sequential([
        # CNN layers for spatial feature extraction
        layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu', input_shape=input_shape),
        layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
        layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
        layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.BatchNormalization(),

        # Flatten the output of the CNN to feed into the LSTM
        layers.Flatten(),

        # Add LSTM layers, making sure the first one returns sequences
        layers.Reshape((-1, 256)), # Reshape the flattened output to be sequence-like if necessary
        layers.LSTM(256, return_sequences=True, kernel_regularizer=regularizers.l2(0.001)),
        layers.LSTM(256, kernel_regularizer=regularizers.l2(0.001)),
        layers.BatchNormalization(),
        layers.Dropout(0.5),

        # Dense layers for final classification
        layers.Dense(64, activation='relu'),  # First fully connected layer
        layers.Dense(32, activation='relu'),  # Second fully connected layer
        layers.Dense(num_classes, activation='softmax'),  # Output layer
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model



In [99]:
X = np.array([df for df in data_windows])
X = np.stack(X)
y = LabelEncoder().fit_transform(label_list)

# Convert X to float32 and y to categorical (for softmax)
X = X.astype('float32')
y_categorical = to_categorical(y)

In [100]:
overall_dict = {}

In [101]:
for n in label_list:
    if n not in overall_dict:
        overall_dict[n] = 1
    elif n in overall_dict:
        overall_dict[n] += 1
        

In [102]:
overall_dict

{'shuffle walking_normal': 1242,
 'lying down on stomach_normal': 1242,
 'ascending stairs_normal': 1242,
 'sitting/standing_normal': 2484,
 'running_normal': 1242,
 'lying down right_normal': 1242,
 'descending stairs_normal': 1242,
 'miscellaneous movements_normal': 1242,
 'normal walking_normal': 1242,
 'lying down on left_normal': 1242,
 'lying down back_normal': 1242}

In [103]:
# # Define the Leave-One-Out cross-validator
# loo = LeaveOneOut()

# # Lists to store scores for each fold
# accuracy_scores = []

# # Iterate over each train-test split
# for train_index, test_index in loo.split(X):
#     # Split the data into training and test set for the current fold
#     X_train_fold, X_test_fold = X[train_index], X[test_index]
#     y_train_fold, y_test_fold = y_categorical[train_index], y_categorical[test_index]

#     # Define input_shape and num_classes based on the training data
#     input_shape = X_train_fold.shape[1:]  # (timesteps, features)
#     num_classes = y_train_fold.shape[1]   # number of categories

#     # Create a new model instance (define create_model function as per your model architecture)
#     model = create_model_offline(input_shape, num_classes)

#     # Train the model
#     model.fit(X_train_fold, y_train_fold, epochs=10, batch_size=32, verbose=0)

#     # Evaluate the model on the test set
#     loss, accuracy = model.evaluate(X_test_fold, y_test_fold, verbose=0)
#     accuracy_scores.append(accuracy)

# # Calculate the average accuracy over all LOO folds
# average_accuracy = np.mean(accuracy_scores)
# print(f'LOO Cross-Validation Accuracy: {average_accuracy:.4f}')

In [104]:
# # Evaluate the model on the test data
# scores = model.evaluate(X_test_fold, y_test_fold, verbose=0)
# accuracy_scores.append(scores[1])  # Assuming that scores[1] is accuracy if you followed the standard compile

# # The final accuracy is the mean of all fold accuracies
# average_accuracy = np.mean(accuracy_scores)
# print(f'LOO Cross-Validation Accuracy: {average_accuracy:.4f}')


In [119]:
def create_model_online(input_shape, num_classes):
    model = models.Sequential([
        # CNN layers for spatial feature extraction
        layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape),
        layers.Conv1D(filters=64, kernel_size=3, activation='relu'),
        layers.Conv1D(filters=128, kernel_size=3, activation='relu'),
        layers.Conv1D(filters=128, kernel_size=3, activation='relu'),
        layers.MaxPooling1D(pool_size=2),
        layers.BatchNormalization(),
        
        layers.Flatten(),

        #  # GRU layers
        # layers.GRU(128, return_sequences=True, 
        #            kernel_regularizer=regularizers.l2(0.001)),  # L2 Regularization
        # layers.GRU(64, kernel_regularizer=regularizers.l2(0.001)),  # L2 Regularization
        # layers.BatchNormalization(),
        # layers.Dropout(0.5),  # Dropout

        # Dense layers for final classification
        layers.Dense(64, activation='relu'),  # First fully connected layer
        layers.Dense(32, activation='relu'),  # Second fully connected layer
        layers.Dense(num_classes, activation='softmax'),  # Output layer
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [107]:
np.array(label_list).shape

(14904,)

In [108]:
import numpy as np

# Assuming `x` is your dataset with the shape (14904, 50, 3)
# And `label_list` is a list or numpy array with the length 14904

# Convert label_list to a numpy array if it isn't already
label_list = np.array(label_list)

# Initialize dictionaries to hold the min and max flags for each label
min_flags_by_label = {}
max_flags_by_label = {}

# Loop over each unique label
for label in np.unique(label_list)[:1]:
    # Find the indices where the label occurs
    indices = np.where(label_list == label)[0]
    
    # Select the data points corresponding to the current label
    data_points = X[indices]
    print(data_points[0])
    
    # Reshape data points to a 2D array where each row is a time point and columns are x, y, z
    reshaped_points = data_points.reshape(-1, 3)
    

    # Find the indices of the min and max values across the x, y, z axis
    min_indices = np.argmin(reshaped_points, axis=1)
    max_indices = np.argmax(reshaped_points, axis=1)
    print(min_indices, max_indices)

    # Convert these indices to a one-hot encoded format
    min_one_hot = np.eye(3)[min_indices]
    max_one_hot = np.eye(3)[max_indices]

    # Aggregate the one-hot encoded min and max indicators for the current label
    min_aggregated = np.sum(min_one_hot, axis=0)
    max_aggregated = np.sum(max_one_hot, axis=0)

    # Store the aggregated min and max flags in the dictionaries, keyed by label
    min_flags_by_label[label] = min_aggregated
    max_flags_by_label[label] = max_aggregated

# Now min_flags_by_label and max_flags_by_label dictionaries hold the min and max flags for each label.
# The flags indicate the count of how many times each axis (x, y, z) was the minimum or maximum.


[[ 0.20678711 -0.958313   -0.11602783]
 [-0.32202148 -0.89263916 -0.00396729]
 [-0.16381836 -1.0549927  -0.07769775]
 [-0.33276367 -0.88067627 -0.03253174]
 [-0.23950195 -0.7312622  -0.02789307]
 [-0.12988281 -0.68292236 -0.02496338]
 [-0.19824219 -0.53912354  0.05218506]
 [-0.26782227 -0.6939087   0.03582764]
 [-0.2084961  -0.88775635  0.01580811]
 [-0.20727539 -0.9451294   0.01776123]
 [-0.2824707  -1.0991821   0.15301514]
 [-0.37817383 -1.2046509   0.16131592]
 [-0.45996094 -1.2398071  -0.07647705]
 [-0.27734375 -1.3796997  -0.46221924]
 [-0.23120117 -1.4768677  -0.2600708 ]
 [-0.4345703  -0.89312744 -0.17633057]
 [-0.39770508 -0.5862427  -0.40264893]
 [-0.00219727 -0.9783325  -0.25299072]
 [-0.53027344 -1.171936   -0.13311768]
 [-0.41601562 -0.66851807 -0.17926025]
 [-0.1940918  -0.66607666 -0.09796143]
 [-0.15795898 -0.6555786  -0.09307861]
 [-0.32202148 -0.5964966  -0.11529541]
 [-0.22290039 -0.6929321  -0.13116455]
 [-0.3684082  -0.81744385 -0.10870361]
 [-0.50097656 -0.88775635

In [109]:
max_flags_by_label

{'ascending stairs_normal': array([37134.,  1979., 22987.])}

In [110]:
min_flags_by_label

{'ascending stairs_normal': array([ 2100., 58984.,  1016.])}

In [111]:
# Define input_shape and num_classes based on the full dataset
input_shape = X.shape[1:]  # (timesteps, features)
num_classes = y_categorical.shape[1]   # number of categories

In [112]:
test_list = [[0, 0, -1]] * 50
test_input_data = np.array(test_list).reshape(input_shape)


In [113]:
# Create a new model instance using the defined architecture
model_online = create_model_online(input_shape, num_classes)

# Split the dataset into training and validation sets if needed
# If you want to use the full dataset for training, you can skip this step
X_train, X_val, y_train, y_val = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

# Train the model
history = model_online.fit(
    X_train, y_train,
    epochs=10,
    batch_size=32,
    verbose=1,
    validation_data=(X_val, y_val)  # Omit this if not using a validation set
)

# Evaluate the model on the validation set if it was used
if 'X_val' in locals():
    val_loss, val_accuracy = model_online.evaluate(X_val, y_val, verbose=1)
    print(f'Validation Accuracy: {val_accuracy:.4f}')
    


# If you have a separate test set, evaluate on it
# test_loss, test_accuracy = model.evaluate([test_input_data], [], verbose=1)
# print(f'Test Accuracy: {test_accuracy:.4f}')

Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Validation Accuracy: 0.9198


In [114]:
result = model_online.predict(np.array([test_input_data]))




In [115]:
result

array([[6.6428309e-15, 1.5954474e-15, 9.8547364e-13, 4.3122473e-05,
        9.9995685e-01, 2.1046068e-10, 1.4687242e-08, 1.4004842e-14,
        2.4372910e-11, 1.7870778e-20, 9.4859840e-12]], dtype=float32)

In [116]:
unique_label_list[np.argmax(result)]

'sitting/standing_normal'

In [117]:
# converter = tf.lite.TFLiteConverter.from_keras_model(model)

# tflite_model_test = converter.convert()

# with open("model.tflite", "wb") as f:
#     f.write(tflite_model_test)

converter = tf.lite.TFLiteConverter.from_keras_model(model_online)

# Allow for TensorFlow ops that aren't natively supported in TFLite
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,
    tf.lite.OpsSet.SELECT_TF_OPS
]

# # Disable the lowering of tensor list operations
converter._experimental_lower_tensor_list_ops = False

tflite_model_test = converter.convert()

with open(exported_path + "model_cnn.tflite", "wb") as f:
    f.write(tflite_model_test)


INFO:tensorflow:Assets written to: /var/folders/gg/10wpd3jj5v7dthydfjzl0gcm0000gn/T/tmpveuzjy4t/assets


INFO:tensorflow:Assets written to: /var/folders/gg/10wpd3jj5v7dthydfjzl0gcm0000gn/T/tmpveuzjy4t/assets
2023-11-08 12:40:18.080536: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2023-11-08 12:40:18.080548: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2023-11-08 12:40:18.080658: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /var/folders/gg/10wpd3jj5v7dthydfjzl0gcm0000gn/T/tmpveuzjy4t
2023-11-08 12:40:18.081936: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2023-11-08 12:40:18.081941: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: /var/folders/gg/10wpd3jj5v7dthydfjzl0gcm0000gn/T/tmpveuzjy4t
2023-11-08 12:40:18.086520: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2023-11-08 12:40:18.147559: I tensorflow/cc/saved_model/loader.cc:215] Running initialization