# IOT DATA SCIENCE MINI-PROJECT


The following script is based on the IoT DataScience Homeworks

In [1]:
import tensorflow as tf

print(f'GPU found: {tf.test.gpu_device_name()}')

GPU found: /device:GPU:0


## Install Libraries

In [2]:
%pip install numpy pandas  tensorflow-addons plotly scikit-learn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-addons
  Downloading tensorflow_addons-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (591 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m591.0/591.0 kB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.7 (from tensorflow-addons)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow-addons
Successfully installed tensorflow-addons-0.20.0 typeguard-2.13.3


In [3]:
import tensorflow as tf
import numpy as np

from tensorflow import keras

import tensorflow_addons as tfa

from urllib.request import urlopen
import pickle


tf.random.set_seed(42)
np.random.seed(42)


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



#### Dataset


In [4]:
ID_DATA_0 ='14bZBIhLLQH3NdAhEkFmcxi7WoeUuni_y' # away
ID_DATA_1 ='1MS4CPEietH0Aw6i-4ZUBGeKbC2pwrNMY' # stressed
ID_DATA_2 ='1bjXvt5Jvk1LBfK7Jee3d5M9cDsdnrD__' # using

#### Validation of Collected Data

In [5]:
from urllib.request import urlopen
import numpy as np
import pandas as pd


COLS = ['timestamp', 'accX','accY','accZ','gyrX', 'gyrY', 'gyrZ']
HZ = 119
DURATION = 10 * 60 * 1000

for l, i in zip([0,1,2], [ID_DATA_0, ID_DATA_1, ID_DATA_2]):
    d = pd.read_csv(
        urlopen(f'https://drive.google.com/uc?export=download&id={i}')
    )
    # Check whether columns are correctly defined
    is_column_correct = np.all(np.isin(d.columns, COLS)) and  np.all(np.isin(COLS, d.columns))
    assert is_column_correct, f'A file, {l}.csv, does not have columns that we expect. Actual columns are: {d.columns}'

    # Check whether the duration of the data collection is at least 5 minutes.
    duration = d['timestamp'].max() - d['timestamp'].min()
    is_duration_correct = duration >= 10 * 60 * 100
    assert is_duration_correct, f'A file, {l}.csv, does not have the sampling rate that we expect. Actual duration is: {duration / (1000 * 60):.2f} minutes.'

    # Check whether the sampling rate is closed to 119 Hz.
    hz = 1000.0 / (d['timestamp'] - d['timestamp'].shift(1)).mean()
    is_hz_correct = np.isclose(hz, HZ, atol = 10)
    assert is_hz_correct, f'A file, {l}.csv, does not have the sampling rate that we expect. Actual sampling rate is: {hz:.2f} Hz'

    print(f'A file, {l}.csv, is valid!')

A file, 0.csv, is valid!
A file, 1.csv, is valid!
A file, 2.csv, is valid!


#### Load and Resample Dataset


In [6]:
from urllib.request import urlopen
import numpy as np
import pandas as pd


load_data = lambda x: pd.read_csv(
    urlopen(f'https://drive.google.com/uc?export=download&id={x}')
).assign(
    timestamp=lambda x: pd.TimedeltaIndex(x['timestamp'], unit='ms')
).set_index(
    'timestamp'
).resample(
    '8.4ms'
).interpolate('linear').values

DATA_0 = load_data(ID_DATA_0)
DATA_1 = load_data(ID_DATA_1)
DATA_2 = load_data(ID_DATA_2)

In [7]:

"""
#replacing accX accY with a magnitude collumn
new_data=[]

for d in [DATA_0,DATA_1,DATA_2]:
  df = pd.DataFrame(DATA_0, columns=['accX', 'accY', 'accZ', 'gyrX', 'gyrY', 'gyrZ'])
  df['maccXY'] = np.sqrt(df['accX']**2 + df['accY']**2)

  #df = df.drop(['accX', 'accY'], axis=1)

  new_data.append(df.values)

df
"""

" \n#replacing accX accY with a magnitude collumn \nnew_data=[]\n\nfor d in [DATA_0,DATA_1,DATA_2]:\n  df = pd.DataFrame(DATA_0, columns=['accX', 'accY', 'accZ', 'gyrX', 'gyrY', 'gyrZ'])\n  df['maccXY'] = np.sqrt(df['accX']**2 + df['accY']**2)\n\n  #df = df.drop(['accX', 'accY'], axis=1)\n\n  new_data.append(df.values)\n\ndf\n"

#### Split Data

Here, we will use data collected for the first four minutes as training data and data collected for the next one minute as test data.

In [8]:
import numpy as np

HZ = 119
X_train, y_train = [], []
X_test, y_test = [], []

for l, d in zip([0, 1, 2], [DATA_0, DATA_1, DATA_2]):

    for i in np.arange(60 * 5):
        s, e = i * HZ, (i + 1) * HZ
        X_train.append(d[s:e, :])

    for i in np.arange(60 * 5, 60 * 6):
        s, e = i * HZ, (i + 1) * HZ
        X_test.append(d[s:e, :])

    y_train.append(np.repeat(l, 60 * 5))
    y_test.append(np.repeat(l, 60 ))

X_train, X_test = np.asarray(X_train), np.asarray(X_test)
y_train, y_test = np.concatenate(y_train, axis=0), np.concatenate(y_test, axis=0)

print(f'X_train: {X_train.shape}')
print(f'X_test: {X_test.shape}')

X_train: (900, 119, 6)
X_test: (180, 119, 6)


In [9]:
from itertools import product
import plotly.graph_objects as go
from plotly.subplots import make_subplots

labels = ["away", "stressed", "using"]
indexes_to_plot = [12, 430, 600]
labels_to_plot = [labels[y_train[idx]] for idx in indexes_to_plot]

print(labels_to_plot)

fig = make_subplots(
    rows=3, cols=1, shared_xaxes=True, shared_yaxes=False, row_titles=labels_to_plot
)

data_format = ['accX', 'accY', 'accZ', 'gyrX', 'gyrY', 'gyrZ']
colors = ['blue', 'red', 'green', 'orange', 'purple', 'yellow']

show_legend = False

for idx, idx_to_plot in enumerate(indexes_to_plot):
    x_to_plot = X_train[idx_to_plot]

    i_to_plot = np.arange(x_to_plot.shape[0])

    if idx == 2:
        show_legend = True
    for feature_idx in range(6):
        fig.add_trace(
            go.Scatter(x=i_to_plot, y=x_to_plot[:, feature_idx],
                       mode='lines+markers', showlegend=show_legend,
                       name=data_format[feature_idx], line=dict(color=colors[feature_idx])),
            row=idx + 1, col=1
        )

fig.update_layout(
    width=1000,
    height=1000
)

fig.show()


['away', 'stressed', 'using']


#### Model Training


While convolutional neural networks are effective, they could not be deployed correctly into the Arduino Nano BLE 33

In [10]:
model = keras.models.Sequential([
    keras.layers.BatchNormalization(input_shape = (119, 6)),
    keras.layers.Conv1D(
        filters=32, kernel_size=14, padding = "same",
        activation = keras.activations.relu,
        kernel_initializer=keras.initializers.HeNormal(seed=42),
    ),
    keras.layers.MaxPooling1D(
        pool_size=2
    ),
    keras.layers.BatchNormalization(),
    keras.layers.Conv1D(
        filters=64, kernel_size=8, padding = "same",
        activation = keras.activations.relu,
        kernel_initializer=keras.initializers.HeNormal(seed=42),
    ),
    keras.layers.MaxPooling1D(
        pool_size=2
    ),
    keras.layers.BatchNormalization(),
    keras.layers.Conv1D(
        filters=128, kernel_size=4, padding = "same",
        activation = keras.activations.relu,
        kernel_initializer=keras.initializers.HeNormal(seed=42),
    ),
    keras.layers.Conv1D(
        filters=128, kernel_size=3, padding = "same",
        activation = keras.activations.relu,
        kernel_initializer=keras.initializers.HeNormal(seed=42),
    ),
    keras.layers.MaxPooling1D(
        pool_size=2
    ),
    keras.layers.BatchNormalization(),
    keras.layers.Flatten(),
    keras.layers.Dense(
        units = 32,
        activation=keras.activations.relu,
        kernel_initializer=keras.initializers.HeNormal(seed=42),
    ),
    keras.layers.Dropout(
        rate=0.5
    ),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(
        units = 32,
        activation=keras.activations.relu,
        kernel_initializer=keras.initializers.HeNormal(seed=42),
    ),
    keras.layers.Dropout(
        rate=0.5
    ),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(
        units = 3,
        activation=keras.activations.softmax,
        kernel_initializer=keras.initializers.GlorotUniform(seed=42),
    )
])

Dense neural networks on the other hand can be deployed to the micro controller without problem

In [11]:
from tensorflow import keras


model = keras.models.Sequential([
    keras.layers.BatchNormalization(input_shape = (119, 6)),
    keras.layers.Flatten(),
    keras.layers.Dense(
        units = 200,
        activation=keras.activations.relu,
        kernel_initializer=keras.initializers.HeNormal(seed=42),
    ),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(
        units = 64,
        activation=keras.activations.relu,
        kernel_initializer=keras.initializers.HeNormal(seed=42),
    ),
    keras.layers.Dropout(
        rate=0.5
    ),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(
        units = 32,
        activation=keras.activations.relu,
        kernel_initializer=keras.initializers.HeNormal(seed=42),
    ),
    keras.layers.Dropout(
        rate=0.5
    ),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(
        units = 3,
        activation=keras.activations.softmax,
        kernel_initializer=keras.initializers.GlorotUniform(seed=42),
    )
])
print(model.summary())



model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(),
    metrics=[
            keras.metrics.SparseCategoricalAccuracy(),
    ]
)


model.fit(
    x=X_train,
    y=y_train,
    batch_size=32,
    epochs=30 ,
    validation_data=(X_test, y_test)
)



Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_normalization_6 (Batc  (None, 119, 6)           24        
 hNormalization)                                                 
                                                                 
 flatten_1 (Flatten)         (None, 714)               0         
                                                                 
 dense_3 (Dense)             (None, 200)               143000    
                                                                 
 batch_normalization_7 (Batc  (None, 200)              800       
 hNormalization)                                                 
                                                                 
 dense_4 (Dense)             (None, 64)                12864     
                                                                 
 dropout_2 (Dropout)         (None, 64)               

<keras.callbacks.History at 0x7f27c1ba3a00>

#### Evaluation

In [12]:
from sklearn.metrics import accuracy_score



y_pred = np.argmax(model.predict(X_test), axis=1)

acc = accuracy_score(y_test, y_pred)

print(f'Accuracy: {acc * 100:.2f} %.')

Accuracy: 90.56 %.


In [13]:
from sklearn.metrics import confusion_matrix
import plotly.express as px
from sklearn import metrics

# let's plot a confusion matrix
def plot_confusion_matrix(y_true, y_pred, classes, normalize=False):

  cm = confusion_matrix(y_true, y_pred)
  if normalize:
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

  fig = px.imshow(cm, text_auto=True, aspect='auto', color_continuous_scale='blues')
  fig.update_layout(height=400, width=400, title='Confusion matrix of the classifier', coloraxis_showscale=False)
  fig.update_xaxes(title='Predicted', tickvals=[0,1,2], ticktext=classes, tickmode='array')
  fig.update_yaxes(title='True', tickvals=[0,1,2], ticktext=classes, tickmode='array')
  fig.show()

plot_confusion_matrix(y_test, y_pred, labels)

The confusion matrix shows that quite often "using" is mistaken for "away

# Deployement

In [14]:
# Convert the model to the TensorFlow Lite format without quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
#converter.optimizations = [tf.lite.Optimize.DEFAULT]
#converter.target_spec.supported_types = [tf.float16]
tflite_quant_model = converter.convert()

# Save the model to disk
open("gesture_model.tflite", "wb").write(tflite_quant_model)

import os
basic_model_size = os.path.getsize("gesture_model.tflite")
print("Model is %d bytes" % basic_model_size)

Model is 636560 bytes


In [15]:
!echo "const unsigned char model[] = {" > /content/model.h
!cat gesture_model.tflite | xxd -i      >> /content/model.h
!echo "};"                              >> /content/model.h

import os
model_h_size = os.path.getsize("model.h")
print(f"Header file, model.h, is {model_h_size:,} bytes.")
print("\nOpen the side panel (refresh if needed). Double click model.h to download the file.")

Header file, model.h, is 3,925,488 bytes.

Open the side panel (refresh if needed). Double click model.h to download the file.
