In [1]:
!pip install scikit-learn



In [2]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [3]:
activity_labels = []
data = []

features_accelometer = ['accel_x','accel_y','accel_z']

data_path = 'clean_respeck_normal'

for filename in os.listdir(data_path):
    file_path = os.path.join(data_path, filename)
    # extract activity label from the filename
    activity = filename.split('_')[3]
    #create a df with the accelometer data
    df = pd.read_csv(file_path).loc[:, features_accelometer]
    
    #appends data and labels
    #print(filename)
    #print(df.values.shape)
    data.append(df.values)
    activity_labels.append(activity)
    
#convert to numpy arrays for the model
data_array = np.array(data)
labels_array = np.array(activity_labels)

In [4]:
#check if all elemets in the array are of the same shape
first_element_shape = data_array[0].shape

all(record.shape == first_element_shape for record in data_array)

True

In [5]:
# get the different shapes in the array

unique_shapes = set()

# Iterate through the elements and add their shapes to the set
for record in data_array:
    unique_shapes.add(record.shape)

# Print out the unique shapes
print("Different shapes in the array:")
for shape in unique_shapes:
    print(shape)

Different shapes in the array:
(125, 3)


## preparing data for the model
#### encoding labels
#### train test and validation split

In [6]:
# encode labels to numeric values
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(labels_array)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(data_array, y, 
                                                    test_size =0.2, random_state=0,
                                                   stratify=y)
# Check the shapes of the resulting arrays
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (715, 125, 3)
y_train shape: (715,)
X_test shape: (179, 125, 3)
y_test shape: (179,)


In [8]:
#create a validation set for the CNN
X_train_val, X_val, y_train_val, y_val = train_test_split(X_train, y_train, 
                                                          test_size = 0.2, random_state=0,
                                                         stratify=y_train)
print("X_train_val shape:", X_train_val.shape)
print("y_train_val shape:", y_train_val.shape)
print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)

X_train_val shape: (572, 125, 3)
y_train_val shape: (572,)
X_val shape: (143, 125, 3)
y_val shape: (143,)


In [18]:
from tensorflow.keras import layers, models

In [11]:
#get the number of unique activity labels in the whole set training and testing
num_activities = len(set(activity_labels))
num_activities

12

In [41]:
def evaluate_model(trainX, trainy, testX, testy, epochs, batch_size):
    model = models.Sequential()

    #1D convolutional layer
    model.add(layers.Conv1D(32, kernel_size=3, activation='relu', input_shape=(125,3)))
    model.add(layers.MaxPooling1D(pool_size=2))
    
    #flatten before the fully connected layers
    model.add(layers.Flatten())
    
    #add a fully connected layer with 128 units
    model.add(layers.Dense(128, activation='relu'))
    
    #add output layer
    model.add(layers.Dense(num_activities, activation='softmax'))
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    model.summary()
    
    history=model.fit(X_train_val, y_train_val, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val))
    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    
    print(f'Test accuracy: {test_accuracy *100:.2f}%%')

In [42]:
evaluate_model(X_train, y_train, X_test, y_test, epochs=10, batch_size=32)

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_8 (Conv1D)           (None, 123, 32)           320       
                                                                 
 max_pooling1d_8 (MaxPooling  (None, 61, 32)           0         
 1D)                                                             
                                                                 
 flatten_8 (Flatten)         (None, 1952)              0         
                                                                 
 dense_16 (Dense)            (None, 128)               249984    
                                                                 
 dense_17 (Dense)            (None, 12)                1548      
                                                                 
Total params: 251,852
Trainable params: 251,852
Non-trainable params: 0
________________________________________________

In [12]:
from tensorflow import keras

In [56]:
model = keras.Sequential([
    keras.Input(shape=(125,3)),
    keras.layers.Conv1D(32, kernel_size=3, activation="relu"),
    keras.layers.MaxPooling1D(pool_size=2),
    keras.layers.Conv1D(64, kernel_size=3, activation="relu"),
    keras.layers.MaxPooling1D(pool_size=2),
    keras.layers.Flatten(),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(12, activation="softmax"),
])

# Compile model with loss, optimiser, and metrics
model.compile(loss='sparse_categorical_crossentropy',
              optimizer="adam",
              metrics=['accuracy'])

# Train the model for 20 epochs with batch size 128
batch_size = 128
epochs = 20
history = model.fit(X_train, y_train, batch_size=batch_size,
                    epochs=epochs, validation_data=(X_val, y_val))
test_loss, test_accuracy = model.evaluate(X_test, y_test)
    
print(f'Test accuracy: {test_accuracy *100:.2f}%%')

model.save('sequential_CNN.h5')


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test accuracy: 84.92%%


In [60]:
import tensorflow as tf
model=tf.keras.models.load_model('sequential_CNN.h5')

#convert to TensorFLowLite
conv = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = conv.convert()

with open('sequential_CNN.tflite', 'wb') as f:
    f.write(tflite_model)



INFO:tensorflow:Assets written to: C:\Users\1juli\AppData\Local\Temp\tmpaegvf29s\assets


INFO:tensorflow:Assets written to: C:\Users\1juli\AppData\Local\Temp\tmpaegvf29s\assets


## Grid search: optimize mode with different parameters

code for the grid search adapted from https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/

In [9]:
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasClassifier

In [25]:
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10,50,100]
optimizer = ['Adam', 'SGD', 'Adamax']
activation = ['relu', 'sigmoid']

param_grid={
    #'batch_size': batch_size,
           'optimizer': optimizer}
          # 'activation': activation}

In [30]:
# model with placeholders for parameters
def create_model(optimizer='adam', activation = 'relu'):
    model=keras.Sequential([
    keras.Input(shape=(125,3)),
    keras.layers.Conv1D(32, kernel_size=3, activation=activation),
    keras.layers.MaxPooling1D(pool_size=2),
    keras.layers.Conv1D(64, kernel_size=3, activation=activation),
    keras.layers.MaxPooling1D(pool_size=2),
    keras.layers.Flatten(),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(12, activation="softmax")
    ])
    model.compile(loss='sparse_categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])
    
    return model

seed=0
tf.random.set_seed(seed)

model=KerasClassifier(build_fn=create_model, epochs=10, batch_size=32, verbose=0)
grid=GridSearchCV(estimator=model, param_grid=param_grid,cv=3)

#model.get_params().keys()

grid_results = grid.fit(X_train, y_train)
# Print the best parameters and results
print(f'Best parameters: {grid_results.best_params_}')
print(f'Best accuracy: {grid_results.best_score_ * 100:.2f}%')

# # summarize results
# print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
# means = grid_result.cv_results_['mean_test_score']
# stds = grid_result.cv_results_['std_test_score']
# params = grid_result.cv_results_['params']
# for mean, stdev, param in zip(means, stds, params):
#     print("%f (%f) with: %r" % (mean, stdev, param)) 

  "``build_fn`` will be renamed to ``model`` in a future release,"
  "``build_fn`` will be renamed to ``model`` in a future release,"
  "``build_fn`` will be renamed to ``model`` in a future release,"
  "``build_fn`` will be renamed to ``model`` in a future release,"
  "``build_fn`` will be renamed to ``model`` in a future release,"
  "``build_fn`` will be renamed to ``model`` in a future release,"
  "``build_fn`` will be renamed to ``model`` in a future release,"
  "``build_fn`` will be renamed to ``model`` in a future release,"
  "``build_fn`` will be renamed to ``model`` in a future release,"
  "``build_fn`` will be renamed to ``model`` in a future release,"


Best parameters: {'optimizer': 'Adamax'}
Best accuracy: 81.40%


# SVM

In [11]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

#support vector classifier

X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

linear_svm = SVC(kernel='linear')
linear_svm.fit(X_train_flat, y_train)
y_pred = linear_svm.predict(X_test_flat)
accuracy_score(y_test, y_pred)

0.664804469273743

In [35]:
poly_svm = SVC(kernel='poly')
poly_svm.fit(X_train_flat, y_train)
y_pred = poly_svm.predict(X_test_flat)
print('accuracy of polynomial svm', accuracy_score(y_test, y_pred))

sig_svm = SVC(kernel='sigmoid', gamma='scale')
sig_svm.fit(X_train_flat, y_train)
y_pred = sig_svm.predict(X_test_flat)
print('accuracy of sigmoid svm', accuracy_score(y_test, y_pred))


accuracy of polynomial svm 0.7150837988826816
accuracy of sigmoid svm 0.5195530726256983


# Decision Tree
with different criteria and depth

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RepeatedStratifiedKFold

criteria = ['gini', 'entropy', 'log_loss']
max_depth = [3, 4, 5, 6, 7, 8, 9]
model = DecisionTreeClassifier()
grid = dict(criterion = criteria, max_depth = max_depth)

grid_search = GridSearchCV(estimator = model, param_grid = grid, scoring='accuracy', verbose=1)
grid_result = grid_search.fit(X_train_flat, y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Fitting 5 folds for each of 21 candidates, totalling 105 fits


In [None]:
#2D model for using accelometer and gyroscope


import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

#define CNN
model = models.Sequential()

# Input layer: adjust to the shape of data
model.add(layers.InputLayer(input_shape=(window_size, num_sensor_features, 1)))

#conv layers
model.add(layers.Conv2D(32, (3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64, (3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))

#flatten output
model.add(layers.Flatten())

#fully connected layers 
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5)) #for regularisation
model.add(layers.Dense(num_classes, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

#display model summary
model.summary()

history=model.fit(train_data, train_labels, epochs=num_epochs, batch_size=batch_size,
                 validation_data=(validation_data, validation_labels))

test_loss, test_accuracy = model.evaluate(test_data, test_labels)

print(f'Test Accuracy: {test_accuracy}')

#predictions for a given data with the size of the window
#predictions = model.predict(input_data)