In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models
import os
import pandas as pd

# Assuming there are N classes to predict
N_CLASSES = 42

model = models.Sequential([
    layers.Input(shape=(7,)),  # 7 features: accel_x, accel_y, accel_z, gyro_x, gyro_y, gyro_z, ind
    layers.Dense(64, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(N_CLASSES, activation='softmax')  # Softmax for multi-class classification
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Use 'categorical_crossentropy' if one-hot encoded
              metrics=['accuracy'])

# Model summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                512       
                                                                 
 dense_1 (Dense)             (None, 64)                4160      
                                                                 
 dense_2 (Dense)             (None, 42)                2730      
                                                                 
Total params: 7,402
Trainable params: 7,402
Non-trainable params: 0
_________________________________________________________________


In [3]:
def extract_activity_and_status(filename):
    # Extract the parts of the filename
    filename_without_extension = os.path.splitext(os.path.basename(filename))[0]
    details = filename_without_extension.split("_")
    
    # Extract the device name, ID, activity, and timestamp from the details
    device_name = details[0]
    user_id = details[1]
    activity = details[2]
    activity_sub = details[3]
    status = details[4]

    return activity, activity_sub, status

In [4]:
input_directory = "../assets/pdiot_submission/s2102187"

data_list = []
label_list = []

for file in os.listdir(input_directory):
    if file.endswith(".csv") and "unprocessed" not in file:
        filename = os.path.join(input_directory, file)
        label, _, _ = extract_activity_and_status(filename)

        df = pd.read_csv(filename, usecols=[1,2,3])
        df['label'] = label
        label_list.append(label)
        data_list.append(df)

# all_data = pd.concat(data_list, ignore_index=True)
print(data_list)

[      accel_x   accel_y   accel_z             label
0    0.148682 -1.047180 -0.022522  Ascending stairs
1    0.259766 -1.153625 -0.092590  Ascending stairs
2    0.283447 -1.151672 -0.052063  Ascending stairs
3    0.201660 -0.957581 -0.007629  Ascending stairs
4    0.179443 -0.877014  0.004089  Ascending stairs
..        ...       ...       ...               ...
755  0.518799 -0.840881 -0.547180  Ascending stairs
756  0.339111 -0.847717 -0.563538  Ascending stairs
757  0.451172 -0.854065 -0.412170  Ascending stairs
758  0.126465 -0.971985 -0.275452  Ascending stairs
759  0.198242 -0.860901 -0.418030  Ascending stairs

[760 rows x 4 columns],       accel_x   accel_y   accel_z              label
0   -0.030518 -0.773743 -0.074768  Descending stairs
1   -0.083252 -0.751770  0.013611  Descending stairs
2   -0.171875 -0.845032 -0.005920  Descending stairs
3   -0.116211 -0.729797 -0.059387  Descending stairs
4   -0.157959 -0.684875 -0.076965  Descending stairs
..        ...       ...       ..

In [5]:
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Normalizing sensor readings
scaler = StandardScaler()

sensor_columns = ['accel_x', 'accel_y', 'accel_z']

scaler.fit(data_list[0][sensor_columns])

for i in range(len(data_list)):
    data_list[i][sensor_columns] = scaler.transform(data_list[i][sensor_columns])


In [10]:
import numpy as np
from sklearn.model_selection import train_test_split

# Convert data and labels to numpy arrays
X = np.array([df.values for df in data_list])
for i in X:
    if i.shape[0] < 760:
        print(i, i.shape)

X = np.stack(X)
y = LabelEncoder().fit_transform(label_list)


# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print(X_train.shape)

# Define LSTM model
model = models.Sequential([
    layers.LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])),
    layers.Dense(len(np.unique(y)), activation='relu')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2) 

[[-3.6695896651432673 4.718862474298133 9.074591626476668
  'Lying down back']
 [-3.8362230990181447 4.7271191980612475 9.036058899851604
  'Lying down back']
 [-4.053008269746434 4.625286429473778 8.296932317621009
  'Lying down back']
 ...
 [-3.811956029106524 4.458776064684136 9.38285322425466 'Lying down back']
 [-3.740772880257439 4.4629044660217625 8.82763236770361
  'Lying down back']
 [-3.765039883903973 4.842713161647412 8.913455226213602
  'Lying down back']] (719, 4)
[[-3.1632183127470856 5.0271130325880735 9.25674622980848
  'Lying down back']
 [-3.3282338845314476 5.186742782968625 9.167420696157937
  'Lying down back']
 [-3.329851680356878 5.0615159975384865 9.36183552337185
  'Lying down back']
 ...
 [-3.223076890818163 4.802805714188256 9.211207722457223
  'Lying down back']
 [-3.3023490187943976 4.828951955375554 9.25674622980848
  'Lying down back']
 [-3.3023490187943976 4.859226575354152 9.274261112069041
  'Lying down back']] (743, 4)
[[-2.2912241098274655 5.5252679

  """


ValueError: all input arrays must have the same shape