<a href="https://colab.research.google.com/github/JMaio/pdiot-har-project/blob/main/tensorflow/pdiot_training_basic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
print(f"TF version: {tf.__version__}")

%matplotlib notebook

TF version: 2.0.0


In [2]:
import os
from pathlib import Path
import re

In [3]:
data_root = Path('.')/'data'
!git clone https://github.com/specknet/pdiot-data.git $data_root

fatal: destination path 'data' already exists and is not an empty directory.


In [4]:
target_folder = data_root/'2020'
student_pattern = "s\d{7}"

files = {}
for (dirpath, dirnames, filenames) in os.walk(target_folder):
  s_match = re.search(student_pattern, dirpath)
  # if regex matches, and files exist
  if s_match and filenames:
    s = s_match.group()
    # print(f"adding files for {s} (in {dirpath})")
    dir = Path(dirpath)
    try: files[s]
    except KeyError: files[s] = []
    # only accept .csv files
    files[s].extend([dir/f for f in filenames if f[-4:] == '.csv'])

print("File count:")
{k: len(v) for (k, v) in files.items()}

File count:


{'s1621503': 84,
 's1704037': 80,
 's1701688': 84,
 's1737472': 78,
 's1758009': 78,
 's1710228': 78,
 's1660711': 36,
 's1655560': 78,
 's2017768': 8,
 's1721039': 81,
 's1746788': 84,
 's1711507': 97,
 's1642301': 84,
 's1764751': 20,
 's1616573': 94}

In [5]:
ACTIVITY_CODE_TO_TFCODE_MAPPING = {
  0:   0,   # "Sitting",
  4:   1,   # "Sitting bent forward",
  5:   2,   # "Sitting bent backward",
  1:   3,   # "Walking at normal speed",
  100: 4,   # "Standing",
  2:   5,   # "Lying down on back",
  7:   6,   # "Lying down left",
  6:   7,   # "Lying down right",
  8:   8,   # "Lying down on stomach",
  9:   9,   # "Movement",
  11:  10,  # "Running",
  12:  11,  # "Climbing stairs",
  13:  12,  # "Descending stairs",
  31:  13,  # "Desk work"
}

ACTIVITY_TFCODE_TO_CODE_MAPPING = {
  0:  0,   # "Sitting",
  1:  4,   # "Sitting bent forward",
  2:  5,   # "Sitting bent backward",
  3:  1,   # "Walking at normal speed",
  4:  100, # "Standing",
  5:  2,   # "Lying down on back",
  6:  7,   # "Lying down left",
  7:  6,   # "Lying down right",
  8:  8,   # "Lying down on stomach",
  9:  9,   # "Movement",
  10: 11,  # "Running",
  11: 12,  # "Climbing stairs",
  12: 13,  # "Descending stairs",
  13: 31,  # "Desk work"
}

In [6]:
from dataclasses import dataclass

header_size = 5

@dataclass
class Header:
  sensor_pos: str
  sensor_side: str
  act_type: str
  act_code: np.ndarray
  subject_id: str
  # https://stackoverflow.com/a/54863771/9184658
  def __post_init__(self):
    self.act_code = int(self.act_code)


@dataclass
class Activity:
  header: Header
  df: pd.DataFrame

def read_custom_file(filename):
  with open(filename) as f:
    head = [next(f).rstrip().split('# ')[1] for x in range(header_size)]
    header = Header(*[s.split(': ')[1] for s in head])
    # for l in head:
    #   print(l)
    # print(header)

    df = pd.read_csv(filename, header=header_size)
    return Activity(header, df)

In [7]:
def plot_respeck_fig(df):
  fig = plt.figure(figsize=(10, 8))
  plt.plot(df['accel_x'], label="accel_x")
  plt.plot(df['accel_y'], label="accel_y")
  plt.plot(df['accel_z'], label="accel_z")
  plt.legend()
  return plt

In [8]:
act = read_custom_file(files["s1621503"][0])
act

Activity(header=Header(sensor_pos='Wrist', sensor_side='Right', act_type='Lying down right', act_code=6, subject_id='s1621503'), df=         timestamp  seq   accel_x   accel_y   accel_z
0    1601826499860    0  0.080566 -0.230530 -0.979065
1    1601826499938    1  0.081055 -0.230042 -0.978577
2    1601826500017    2  0.084961 -0.233215 -0.978333
3    1601826500096    3  0.083740 -0.231750 -0.979797
4    1601826500175    4  0.084473 -0.232483 -0.981750
..             ...  ...       ...       ...       ...
411  1601826532226  411  0.060547 -0.211487 -0.983704
412  1601826532305  412  0.060547 -0.207825 -0.989075
413  1601826532383  413  0.060303 -0.204407 -0.987366
414  1601826532462  414  0.061279 -0.208313 -0.986145
415  1601826532541  415  0.061523 -0.207092 -0.984924

[416 rows x 5 columns])

In [9]:
plot_respeck_fig(act.df)

<IPython.core.display.Javascript object>

<module 'matplotlib.pyplot' from '/home/c/.local/lib/python3.6/site-packages/matplotlib/pyplot.py'>

In [10]:
train = read_custom_file(files["s1621503"][0])
test  = read_custom_file(files["s1616573"][0])

In [11]:
keys = ["accel_x", "accel_y", "accel_z"]

In [12]:
train_data = np.array([train.df.iloc[0].get(keys).values])
train_code = train.header.act_code
test_data = np.array([test.df.iloc[0].get(keys).values])
test_code = test.header.act_code

In [19]:
train_data, train_code

[array([ 0.08056641, -0.23052979, -0.97906494])]

In [14]:
test_data, test_code

(array([[-0.7507324 ,  0.09686279,  0.6395874 ]]), 7)

In [15]:
# code from https://www.tensorflow.org/overview/ - "for beginners"
model = tf.keras.models.Sequential([
  # tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  # tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(14, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_data, np.array([ACTIVITY_CODE_TO_TFCODE_MAPPING[train_code]]), epochs=1)

Train on 1 samples


<tensorflow.python.keras.callbacks.History at 0x7f0f1353c6a0>

In [16]:
np.argmax(model.predict(test_data), axis=-1)

array([13])

In [17]:
model.evaluate(test_data, np.array([ACTIVITY_CODE_TO_TFCODE_MAPPING[test_code]]))



[2.526549816131592, 0.0]

In [18]:
# https://www.tensorflow.org/lite/convert/index#convert_a_keras_model_
# (to generate a SavedModel) tf.saved_model.save(model, "saved_model_keras_dir")

# Convert the model.
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the model.
with open('model.tflite', 'wb') as f:
  f.write(tflite_model)