In [1]:
import json
import numpy as np
import pandas as pd
import plotly.express as px
import tensorflow as tf
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers

---
### Load the dataset and split train test split
---

In [5]:
# Load the data
with open('../data/curated-data.json') as f:
    data = json.load(f)
df = pd.DataFrame(data).T
# Filter dataframe to only include the columns we want
df = df[['back', 'left', 'right']]
# Separate the three data into the same label
for i in range(3):
    for pose in ['back', 'left', 'right']:
        df[f'{pose}_{i}'] = df[pose].apply(lambda x: x[i])
# Drop the original columns
df = df.drop(columns=['back', 'left', 'right'])
# Expand the data into a single column
df = df.melt()
# Rename the 'variable' values to only include the pose
df['variable'] = df['variable'].apply(lambda x: x.split('_')[0])
# Rename the columns
df.columns = ['posture', 'reading']
df

Unnamed: 0,posture,reading
0,back,"[[23.64, 23.91, 23.95, 23.82, 24.64, 24.24, 25..."
1,back,"[[24.09, 24.33, 24.21, 24.42, 25.24, 24.97, 25..."
2,back,"[[24.92, 24.97, 25.39, 25.1, 26.22, 26.13, 26...."
3,back,"[[26.39, 25.85, 26.58, 26.84, 27.47, 27.24, 27..."
4,back,"[[25.03, 24.98, 24.68, 24.7, 25.71, 25.73, 25...."
...,...,...
184,right,"[[25.49, 25.76, 25.19, 24.9, 25.86, 25.6, 25.9..."
185,right,"[[25.46, 25.13, 25.56, 25.35, 26.02, 26.33, 26..."
186,right,"[[25.03, 25.41, 25.28, 24.75, 25.14, 25.01, 24..."
187,right,"[[25.27, 25.12, 25.09, 24.9, 25.37, 24.9, 25.2..."


In [6]:
# Split the data into training and testing sets
X = df['reading']
y = df['posture']
# One-hot encode the labels
y = pd.get_dummies(y).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [7]:
X_train = np.array([np.array(x) for x in X_train])
X_train = np.expand_dims(X_train, -1)
X_test = np.array([np.array(x) for x in X_test])
X_test = np.expand_dims(X_test, -1)
y_train = np.array([np.array(x) for x in y_train])
y_test = np.array([np.array(x) for x in y_test])
# Convert the data to tensors
X_train = tf.convert_to_tensor(X_train)
X_test = tf.convert_to_tensor(X_test)
y_train = tf.convert_to_tensor(y_train)
y_test = tf.convert_to_tensor(y_test)

---
### Traininig a generic CNN model
---

In [8]:
model = keras.Sequential(
    [
        layers.Conv2D(8, kernel_size=(3, 3), activation='relu', padding='same', input_shape=X_train.shape[1:]),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(4, kernel_size=(3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),  # Flatten the output of the CNN
        layers.Dense(32, activation='relu'),
        layers.Dense(16, activation='relu'),
        layers.Dense(3, activation='softmax'),
    ]
)

In [9]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 24, 32, 8)         80        
                                                                 
 max_pooling2d (MaxPooling2D  (None, 12, 16, 8)        0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 12, 16, 4)         292       
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 6, 8, 4)          0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 192)               0         
                                                                 
 dense (Dense)               (None, 32)                6

In [34]:
early_stopping = keras.callbacks.EarlyStopping(
    patience=10,
    min_delta=0.001,
    restore_best_weights=True,
)
reduce_lr = keras.callbacks.ReduceLROnPlateau(
    patience=5
)

In [35]:
hist = model.fit(
    X_train,
    y_train, 
    epochs=100, 
    batch_size=16, 
    validation_data=(X_test, y_test),
    callbacks=[early_stopping, reduce_lr]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100


In [36]:
px.line(hist.history, y=['accuracy', 'val_accuracy'], labels={'index': 'epoch', 'value': 'accuracy'})

In [37]:
preds = model.predict(X_test)
preds = np.argmax(preds, axis=1)
y_test_max = np.argmax(y_test, axis=1)
# Create a confusion matrix
confusion_matrix = tf.math.confusion_matrix(labels=y_test_max, predictions=preds).numpy()
# Create a dataframe from the confusion matrix
confusion_matrix = pd.DataFrame(confusion_matrix, index=['back', 'left', 'right'], columns=['back', 'left', 'right'])
# Plot the confusion matrix
px.imshow(confusion_matrix, color_continuous_scale='Blues')



In [11]:
model.save_weights('./checkpoints/my_checkpoint')

In [12]:
values = {
    0: 'back',
    1: 'left',
    2: 'right',
}

# Get the X inputs that were misclassified
misclassified = X_test[y_test_max != preds]
# Get the y inputs that were misclassified
real_labels = y_test_max[y_test_max != preds]
pred_labels = preds[y_test_max != preds]
# Plot the misclassified inputs
for i in range(len(misclassified)):
    fig = px.imshow(
        img = misclassified[i, :, :, 0]
    )
    fig.update_layout(
        title=f'Predicted: {values[pred_labels[i]]}, '
        f'Actual: {values[real_labels[i]]}'
    )
    fig.show()

NameError: name 'y_test_max' is not defined

In [14]:
model.load_weights('./checkpoints/my_checkpoint')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x2a4bed904d0>

---
### Predicting for whole data
---

In [16]:
X_total = np.array([np.array(x) for x in X])
X_total = np.expand_dims(X_total, -1)
X_total = tf.convert_to_tensor(X_total)
y_total = tf.convert_to_tensor(y)

In [17]:
y_pred = model.predict(X_total)
y_pred_max = np.argmax(y_pred, axis=1)
y_total_max = np.argmax(y_total, axis=1)



In [18]:
# Create a confusion matrix
confusion_matrix = tf.math.confusion_matrix(labels=y_total_max, predictions=y_pred_max).numpy()
# Create a dataframe from the confusion matrix
confusion_matrix = pd.DataFrame(confusion_matrix, index=['back', 'left', 'right'], columns=['back', 'left', 'right'])
# Plot the confusion matrix
px.imshow(confusion_matrix, color_continuous_scale='Blues')

In [19]:
print(f'Accuracy: {np.sum(y_pred_max == y_total_max) / len(y_total_max)}')

Accuracy: 0.9312169312169312
