In [773]:
import os
import random
import numpy as np
import pandas as pd
from tensorflow import keras
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.metrics import f1_score, log_loss, accuracy_score

# from charset_normalizer import md__mypyc


# Dataset

### Load data

In [1150]:
window_size = 30
overlap_rate = 0.9

In [1203]:
# file_name = f"./data/processed_w{window_size}_o{str(overlap_rate).replace('0.', '')}_comp.csv"
# file_name = f"./data/processed_w{window_size}_o{str(overlap_rate).replace('0.', '')}_comp_npeak_allsubject.csv"
file_name = f"./data/processed_w{window_size}_o{str(overlap_rate).replace('0.', '')}_comp_npeak.csv"
df = pd.read_csv(file_name)
print(file_name)

./data/processed_w30_o9_comp_npeak.csv


In [1204]:
df.head()

Unnamed: 0,BVP_min,BVP_max,BVP_mean,BVP_std,BVP_n_peak,TEMP_min,TEMP_max,TEMP_mean,TEMP_std,labels,id
0,-163.48,141.1,-1.112052,42.52765,1.233333,31.99,32.05,32.019333,0.017404,1,S14
1,-117.07,126.23,0.054844,38.785298,1.233333,31.99,32.05,32.020417,0.016703,1,S14
2,-117.07,126.23,0.436318,40.096374,1.2,31.99,32.05,32.02075,0.016184,1,S14
3,-117.07,126.23,0.406578,41.129211,1.166667,31.99,32.05,32.021667,0.01655,1,S14
4,-210.57,170.49,-1.314943,48.216133,1.133333,31.99,32.05,32.021833,0.016732,1,S14


In [1205]:
df.columns

Index(['BVP_min', 'BVP_max', 'BVP_mean', 'BVP_std', 'BVP_n_peak', 'TEMP_min',
       'TEMP_max', 'TEMP_mean', 'TEMP_std', 'labels', 'id'],
      dtype='object')

### Scaling

In [1206]:
scaled = MinMaxScaler().fit_transform(df.drop(['labels', 'id'], axis=1).to_numpy())
# fit: Compute the minimum and maximum to be used for later scaling.
# transform: Scaling features of X according to feature_range.
# fit_transform: fit & transform at the same time 
# both input/output are numpy arrays, and thus, DataFrame needs to be converted to a NumPy array (by callig to_numpy()) 

# np.column_stack() is takes a sequence of 1-D or 2-D arrays as input and returns a 2-D array with those arrays stacked as columns.
df_scaled = pd.DataFrame(
  np.column_stack([scaled, df.loc[:, 'labels'].values, df.loc[:, 'id'].values]),
  columns=df.columns
)

df_scaled.head()

Unnamed: 0,BVP_min,BVP_max,BVP_mean,BVP_std,BVP_n_peak,TEMP_min,TEMP_max,TEMP_mean,TEMP_std,labels,id
0,0.850905,0.126835,0.355844,0.218744,0.2,0.639423,0.640777,0.641357,0.131661,1,S14
1,0.895131,0.112767,0.474092,0.198177,0.2,0.639423,0.640777,0.64162,0.123341,1,S14
2,0.895131,0.112767,0.512748,0.205382,0.175,0.639423,0.640777,0.6417,0.117183,1,S14
3,0.895131,0.112767,0.509734,0.211058,0.15,0.639423,0.640777,0.641922,0.121519,1,S14
4,0.806032,0.154642,0.335284,0.250006,0.125,0.639423,0.640777,0.641963,0.123689,1,S14


### Combine amusement and baseline conditions to make it non-stress class

In [1186]:
df_scaled['labels'] = df_scaled['labels'].replace([1,2,3], [0, 1, 0])

In [1187]:
df_scaled_selected  = df_scaled.loc[(df_scaled['id'] == "S14") | (df_scaled['id'] == "S15") | (df_scaled['id'] == "S16") | (df_scaled['id'] == "S17")]
# df_scaled_selected  = df_scaled

In [1188]:
len(df_scaled_selected.query('labels == 0'))  # the number of data in negative class

1574

In [1189]:
len(df_scaled_selected.query('labels == 1'))  # the number of data in positive class

919

In [1190]:
# df_scaled.drop(["BVP_weight_amp_avg", "BVP_weight_energy_avg", "BVP_power_entropy"], axis = 1, inplace=True)

# Modeling

### Building and training

In [1191]:
data = df_scaled_selected.to_numpy()

In [1192]:
SEED = 42
def set_seed(SEED):
    os.environ['PYTHONHASHSEED'] = str(SEED)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    random.seed(SEED)
    tf.random.set_seed(SEED)
    np.random.seed(SEED)

In [1193]:
drop_rate = 0.2
set_seed(SEED)

X = data[:, :-2].astype(float)
y = data[:, -2].astype(int)
groups = data[:, -1]

logo = LeaveOneGroupOut()
logo.get_n_splits(X, y, groups)
print(logo)

splitter = StratifiedKFold(
    n_splits=10, # the number of folds.
    shuffle=True,  # whether data are shuffled before splitting.
    random_state=SEED # a random seed. 
) 


models, METRICS = [], []
for i, (train_index, test_index) in enumerate(logo.split(X, y, groups)):
# for i, (train_index, test_index) in enumerate(splitter.split(X, y)):
    print(f"Fold {i}:")
    print(f"  Test group={set(groups[test_index])}")
    X_train = X[train_index]
    y_train = y[train_index]
    X_test = X[test_index]
    y_test = y[test_index]
    print(f"  the number of training data={len(y_train)}")
    print(f"  the number of test data={len(y_test)}")

    
    '''------ Begin: Model specification ------'''
    model = keras.models.Sequential([
        # keras.layers.Dense(
        #     units = 32, 
        #     input_shape = (X.shape[1],),
        #     activation=keras.activations.relu,
        #     kernel_initializer=keras.initializers.HeNormal(seed=42),
        #     # kernel_regularizer='l2'
        # ),
        keras.layers.Dense(
            units = 8, 
            input_shape = (X.shape[1],),
            activation=keras.activations.relu,
            kernel_initializer=keras.initializers.HeNormal(seed=42),
            # kernel_regularizer='l2'
        ),
        keras.layers.Dropout(
            rate=drop_rate
        ), 
        keras.layers.Dense(
            units = 4, 
            activation=keras.activations.relu,
            kernel_initializer=keras.initializers.HeNormal(seed=42),
            # kernel_regularizer='l2'
        ),
        keras.layers.Dropout(
            rate=drop_rate
        ), 
        keras.layers.Dense(
        units=2, 
        activation=keras.activations.softmax,
        kernel_initializer=keras.initializers.GlorotNormal(seed=42),
        # kernel_regularizer='l2'
        )
    ])
    
    # print(model.summary())
    '''------ End: Model specification ------'''
    
    '''------ Begin: Model compiling ------'''
    model.compile(
        # loss=keras.losses.BinaryCrossentropy(from_logits=False),
        loss=keras.losses.SparseCategoricalCrossentropy(),
        optimizer=keras.optimizers.Adam(),
        metrics=[
                keras.metrics.SparseCategoricalAccuracy(), 
                # keras.metrics.BinaryAccuracy(), 
        ]
    )
    '''------ End: Model compiling ------'''
    
    '''------ Begin: Model fitting ------'''
    model.fit(
        x=X_train,
        y=y_train,
        batch_size=16,
        epochs=40,
        validation_data=(X_test, y_test)
    )
    '''------ End: Model fitting ------'''
    y_prob_dnn = model.predict(X_test)
    y_pred_dnn = np.argmax(y_prob_dnn, axis=1)

    f1 = f1_score(y_true=y_test, y_pred=y_pred_dnn, average='macro')
    loss = log_loss(y_true=y_test, y_pred=y_prob_dnn)
    accuracy = accuracy_score(y_true=y_test, y_pred=y_pred_dnn)
    METRICS.append({
        'Loss': loss, # logistic loss
        'Accuracy': accuracy, # accuracy
        'F1': f1 # F1 score
    })
    models.append(model)
    print(METRICS[-1])

LeaveOneGroupOut()
Fold 0:
  Test group={'S14'}
  the number of training data=1874
  the number of test data=619
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
{'Loss': 0.04686496080347189, 'Accuracy': 0.9870759289176091, 'F1': 0.9859808850840242}
Fold 1:
  Test group={'S15'}
  the number of training data=1873
  the number of test data=620
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epo

In [1170]:
model.summary()

Model: "sequential_920"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3072 (Dense)           (None, 8)                 80        
_________________________________________________________________
dropout_1688 (Dropout)       (None, 8)                 0         
_________________________________________________________________
dense_3073 (Dense)           (None, 4)                 36        
_________________________________________________________________
dropout_1689 (Dropout)       (None, 4)                 0         
_________________________________________________________________
dense_3074 (Dense)           (None, 2)                 10        
Total params: 126
Trainable params: 126
Non-trainable params: 0
_________________________________________________________________


### Model evaluation

In [1194]:
acc_list, f1_list = [], []
for m in METRICS:
    acc_list.append(m['Accuracy'])
    f1_list.append(m['F1'])
print(f"Acc: {np.mean(acc_list):.4f}, F1: {np.mean(f1_list):.4f}")

Acc: 0.8865, F1: 0.8311


### Model selection

In [1195]:
idx_best = np.argmax(acc_list)
print(idx_best)

0


In [1196]:
print(f"Acc: {np.mean(acc_list[idx_best]):.4f}, F1: {np.mean(f1_list[idx_best]):.4f}")

Acc: 0.9871, F1: 0.9860


In [1197]:
best_model = models[idx_best]

In [1104]:
# best_model = models[3]

In [1207]:
# best_model.save('./models/best_model_compact.h5')
best_model = tf.keras.models.load_model('./models/best_model_compact.h5')

In [1208]:
test_stress = pd.read_csv('./data/processed/stress_raw_data.csv').iloc[:, 1:]
test_base = pd.read_csv('./data/processed/baseline_raw_data.csv').iloc[:, 1:]

test = pd.concat([test_stress, test_base], axis = 0)
# test.drop(["BVP_peak_f", "BVP_weight_amp_avg", "BVP_weight_energy_avg", "BVP_power_entropy"], axis = 1, inplace=True)
# test.drop(["BVP_n_peak"], axis = 1, inplace=True)

X_test = test.drop(['labels'], axis=1)
y_test = test["labels"]

scaler = MinMaxScaler()
scaler.fit(X_test)

print(scaler.scale_)
print(scaler.min_)

test_scaled = scaler.transform(X_test)
display(pd.DataFrame(data=test))

df_test_scaled = pd.DataFrame(
  test_scaled,
  columns=X_test.columns
)

y_prob_dnn = best_model.predict(df_test_scaled)
y_pred_dnn = np.argmax(y_prob_dnn, axis=1)

f1 = f1_score(y_true=y_test, y_pred=y_pred_dnn, average='macro')
loss = log_loss(y_true=y_test, y_pred=y_prob_dnn)
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred_dnn)

print(f"Acc: {f1:.4f}, F1: {accuracy:.4f}")

[ 0.02631579  0.03703704  0.58133071  0.12671139  3.          0.44444444
  0.28169014  4.09854144 14.02933386]
[ -11.78947368  -20.59259259 -296.9337571    -2.09010287   -4.7
  -13.17777778   -9.8028169  -136.52555059   -9.23057748]


Unnamed: 0,BVP_min,BVP_max,BVP_mean,BVP_std,BVP_peak_f,TEMP_min,TEMP_max,TEMP_mean,TEMP_std,labels
0,485.0,561.0,512.440446,16.848150,1.833333,30.62,36.42,33.554754,0.690338,1.0
1,469.0,561.0,512.216929,16.986857,1.800000,31.58,36.42,33.549579,0.685278,1.0
2,469.0,561.0,512.220476,17.036637,1.800000,31.58,36.09,33.528728,0.688757,1.0
3,469.0,561.0,512.209326,16.850773,1.800000,30.29,36.09,33.515205,0.696816,1.0
4,469.0,561.0,512.319818,16.937081,1.800000,30.29,38.03,33.512985,0.695611,1.0
...,...,...,...,...,...,...,...,...,...,...
38,476.0,573.0,511.658055,22.729420,1.800000,30.94,34.80,33.396084,0.701112,0.0
39,454.0,577.0,510.782851,23.189235,1.800000,30.94,36.09,33.400507,0.697915,0.0
40,454.0,577.0,511.164300,23.393213,1.800000,30.94,36.09,33.400735,0.699104,0.0
41,454.0,577.0,511.197262,23.943366,1.800000,30.94,36.09,33.398144,0.696720,0.0


Acc: 0.8111, F1: 0.8462


In [1209]:
print(y_test, y_pred_dnn)

0     1.0
1     1.0
2     1.0
3     1.0
4     1.0
     ... 
38    0.0
39    0.0
40    0.0
41    0.0
42    0.0
Name: labels, Length: 130, dtype: float64 [1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 1 1 1 0 0 0 0 1 1 1 1 1 1]


# Generate a TensorFlow Lite Model

### Generate Models without Quantization

In [1264]:
# Define paths to model files
import os
MODELS_DIR = 'models/'
os.path.join(MODELS_DIR)
MODEL_TF = MODELS_DIR + 'model.pb'
MODEL_NO_QUANT_TFLITE = MODELS_DIR + 'model_no_quant.tflite'
MODEL_TFLITE = MODELS_DIR + 'model.tflite'
MODEL_TFLITE_MICRO = MODELS_DIR + 'model.cc'

In [1237]:
# Convert the model to the TensorFlow Lite format without quantization
converter = tf.lite.TFLiteConverter.from_keras_model(best_model)
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# converter.target_spec.supported_types = [tf.float16]
tflite_no_quant_model = converter.convert()
# model_no_quant_tflite = converter.convert()

# # Save the model to disk
open(MODEL_NO_QUANT_TFLITE, "wb").write(tflite_no_quant_model)

INFO:tensorflow:Assets written to: /tmp/tmpjviqr8m3/assets


INFO:tensorflow:Assets written to: /tmp/tmpjviqr8m3/assets
2023-06-07 08:08:55.535580: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-06-07 08:08:55.535659: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2023-06-07 08:08:55.535704: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2023-06-07 08:08:55.535972: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-06-07 08:08:55.536061: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-06-07 08:08:55.536139: I t

2400

In [1247]:
interpreter = tf.lite.Interpreter(model_path = MODEL_NO_QUANT_TFLITE)
input_index = interpreter.get_input_details()[0]["index"]
output_index = interpreter.get_output_details()[0]["index"]

interpreter.allocate_tensors()
print(interpreter.get_input_details())

[{'name': 'dense_2841_input', 'index': 0, 'shape': array([1, 9], dtype=int32), 'shape_signature': array([-1,  9], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


### Model size

In [1220]:
import sys, os
def format_size(variable):
    size_in_bytes = sys.getsizeof(variable)
    units = ["B", "KB", "MB", "GB", "TB"]
    unit_index = 0
    while size_in_bytes >= 1024 and unit_index < len(units) - 1:
        size_in_bytes /= 1024.0
        unit_index += 1
    return "{:.2f} {}".format(size_in_bytes, units[unit_index])

print("Model is {}".format(format_size(model_no_quant_tflite)))
# print("Quantized Model is {}".format(format_size(model_tflite)))

Model is 2.38 KB


### Deploy to a Microcontroller

In [1243]:
!xxd -i {MODEL_NO_QUANT_TFLITE} > {MODEL_TFLITE_MICRO}

In [1248]:
REPLACE_TEXT = MODEL_NO_QUANT_TFLITE.replace('/', '_').replace('.', '_')

In [1249]:
!sed -i 's/'{REPLACE_TEXT}'/g_model/g' {MODEL_TFLITE_MICRO}

In [1265]:
!cat {MODEL_NO_QUANT_TFLITE}

  	  	    �  �  �  �  (          <          
   
      
      min_runtime_version R���      1.5.0              MLIR Converted.      
         @   D   H   X   
   �  x  �  d  �  l  �  �  �  �   �             
      H  d  �   
      main    
          	   
         .���  �?   
      	   ����         ����   
      0                  Identity       ����   ����R���     
      ����   	              @���   
      h   X            B   sequential_843/dense_2843/MatMul;sequential_843/dense_2843/BiasAdd     ����   L���      
        
         
���                     ���   	      �   x            a   sequential_843/dense_2842/MatMul;sequential_843/dense_2842/Relu;sequential_843/dense_2842/BiasAdd      ����   ,���      
        $      
                                
  
   
  
   	      	   ���        �   x           a   sequential_843/dense_2841/MatMul;sequential_843/