In [1]:
import pandas as pd
import numpy as np

In [3]:
# Activities are the class labels
# It is a 6 class classification
ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

# Utility function to print the confusion matrix
def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

### Defining the Data Directory and Signal Files

In [6]:
# Data directory
DATADIR = 'UCI_HAR_Dataset'

In [8]:
# Raw data signals
# Signals are from Accelerometer and Gyroscope
# The signals are in x,y,z directions
# Sensor signals are filtered to have only body acceleration
# excluding the acceleration due to gravity
# Triaxial acceleration from the accelerometer is total acceleration
SIGNALS = [
    "body_acc_x",
    "body_acc_y",
    "body_acc_z",
    "body_gyro_x",
    "body_gyro_y",
    "body_gyro_z",
    "total_acc_x",
    "total_acc_y",
    "total_acc_z"
]

### Loading the Raw Signal Data for Training and Testing

In [74]:
import numpy as np
import os

# Define paths for the train and test sets
train_data_path = 'UCI_HAR_Dataset/train/Inertial Signals/'
test_data_path = 'UCI_HAR_Dataset/test/Inertial Signals/'

# List of signal files for training
train_signal_files = ['body_acc_x_train.txt', 'body_acc_y_train.txt', 'body_acc_z_train.txt',
                      'body_gyro_x_train.txt', 'body_gyro_y_train.txt', 'body_gyro_z_train.txt',
                      'total_acc_x_train.txt', 'total_acc_y_train.txt', 'total_acc_z_train.txt']

# List of signal files for testing
test_signal_files = ['body_acc_x_test.txt', 'body_acc_y_test.txt', 'body_acc_z_test.txt',
                     'body_gyro_x_test.txt', 'body_gyro_y_test.txt', 'body_gyro_z_test.txt',
                     'total_acc_x_test.txt', 'total_acc_y_test.txt', 'total_acc_z_test.txt']

# Function to load a single file
def load_signal_file(file_path):
    return np.loadtxt(file_path)

# Function to load the X data (train or test)
def load_X_data(data_path, signal_files):
    X_signals = []
    for signal_file in signal_files:
        file_path = os.path.join(data_path, signal_file)
        signal_data = load_signal_file(file_path)
        X_signals.append(signal_data)
    # Stack signals along the third dimension (samples, timesteps, signal_features)
    return np.stack(X_signals, axis=-1)

# Load X_train and X_test
X_train = load_X_data(train_data_path, train_signal_files)
X_test = load_X_data(test_data_path, test_signal_files)

# Load y_train and y_test labels
y_train = np.loadtxt('UCI_HAR_Dataset/train/y_train.txt')
y_test = np.loadtxt('UCI_HAR_Dataset/test/y_test.txt')

# Print shapes to verify
print("Shape of X_train:", X_train.shape)  # Expected (7352, 128, 9)
print("Shape of y_train:", y_train.shape)  # Expected (7352,)
print("Shape of X_test:", X_test.shape)    # Expected (2947, 128, 9)
print("Shape of y_test:", y_test.shape)    # Expected (2947,)


Shape of X_train: (7352, 128, 9)
Shape of y_train: (7352,)
Shape of X_test: (2947, 128, 9)
Shape of y_test: (2947,)


### Flattening the Signal Data for DataFrame Conversion

In [75]:
import pandas as pd

# Reshape the data to (7352 samples, 128 * 9 features)
X_train_flattened = X_train.reshape(X_train.shape[0], -1)

# Convert the reshaped data to a DataFrame
column_names = [f'signal_{i}_timestep_{j}' for i in range(9) for j in range(128)]
df = pd.DataFrame(X_train_flattened, columns=column_names)

# Display the first few rows of the DataFrame
print(df.head())


   signal_0_timestep_0  signal_0_timestep_1  signal_0_timestep_2  \
0             0.000181             0.010767             0.055561   
1             0.001094            -0.004688            -0.026860   
2             0.003531             0.004456            -0.005915   
3            -0.001772            -0.010185             0.001053   
4             0.000087            -0.003857            -0.013333   

   signal_0_timestep_3  signal_0_timestep_4  signal_0_timestep_5  \
0             0.030191             0.066014             0.022859   
1             0.017111             0.006123             0.012268   
2             0.026189            -0.000238             0.002159   
3            -0.037516            -0.012886            -0.000873   
4            -0.019429            -0.008612            -0.001574   

   signal_0_timestep_6  signal_0_timestep_7  signal_0_timestep_8  \
0             1.012817            -0.123217             0.102934   
1             1.018851            -0.123976   

In [76]:
df.head()

Unnamed: 0,signal_0_timestep_0,signal_0_timestep_1,signal_0_timestep_2,signal_0_timestep_3,signal_0_timestep_4,signal_0_timestep_5,signal_0_timestep_6,signal_0_timestep_7,signal_0_timestep_8,signal_0_timestep_9,...,signal_8_timestep_118,signal_8_timestep_119,signal_8_timestep_120,signal_8_timestep_121,signal_8_timestep_122,signal_8_timestep_123,signal_8_timestep_124,signal_8_timestep_125,signal_8_timestep_126,signal_8_timestep_127
0,0.000181,0.010767,0.055561,0.030191,0.066014,0.022859,1.012817,-0.123217,0.102934,0.010139,...,0.099874,0.001576,0.00307,-0.00227,0.028818,-3.8e-05,-0.001446,1.021171,-0.121326,0.094987
1,0.001094,-0.004688,-0.02686,0.017111,0.006123,0.012268,1.018851,-0.123976,0.09793,0.00455,...,0.089035,-0.001599,-0.007192,-0.003397,-0.03865,-0.00944,-0.002798,1.017849,-0.130519,0.090612
2,0.003531,0.004456,-0.005915,0.026189,-0.000238,0.002159,1.023127,-0.120016,0.091117,0.002285,...,0.082334,0.002178,-0.005442,-0.012784,-0.020235,-0.008831,-0.002645,1.022019,-0.130183,0.081487
3,-0.001772,-0.010185,0.001053,-0.037516,-0.012886,-0.000873,1.017682,-0.133404,0.095152,-0.001311,...,0.083275,-2.8e-05,-0.001235,-0.004581,-0.00712,0.001951,-0.012987,1.020735,-0.133316,0.081404
4,8.7e-05,-0.003857,-0.013333,-0.019429,-0.008612,-0.001574,1.019952,-0.128731,0.080841,-0.000272,...,0.085397,-0.003328,0.007404,0.004476,0.015605,0.010797,-0.010082,1.016787,-0.123494,0.088816


In [80]:
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt


In [82]:
# Set seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)


3. Defining Model Parameters

In [85]:
# Model parameters
timesteps = X_train.shape[1]  # 128 timesteps
input_dim = X_train.shape[2]  # 9 features from the combined signal files
n_classes = 6  # 6 output classes for activity recognition
epochs = 50  # Number of epochs to train
batch_size = 32  # Batch size for training
n_hidden = 64  # Number of hidden units in the LSTM layers


4. Building the LSTM Model Architecture

In [88]:
from keras.layers import Input

# Defining the LSTM model
model = Sequential()

# Input layer to define the shape of the data
model.add(Input(shape=(timesteps, input_dim)))

# First LSTM Layer
model.add(LSTM(units=n_hidden, return_sequences=True))
model.add(Dropout(0.5))  # Add dropout for regularization
model.add(BatchNormalization())  # Batch normalization for stability

# Second LSTM Layer
model.add(LSTM(units=n_hidden))
model.add(Dropout(0.5))  # Dropout after second LSTM layer
model.add(BatchNormalization())  # Batch normalization again

# Dense output layer with softmax activation for multi-class classification
model.add(Dense(n_classes, activation='softmax'))

# Model summary
model.summary()


5. Compiling the Model

In [91]:
# Compiling the model with Adam optimizer
model.compile(loss='categorical_crossentropy', 
              optimizer=Adam(learning_rate=0.001), 
              metrics=['accuracy'])


6. Setting Up Early Stopping to Avoid Overfitting

In [94]:
# Early stopping callback to stop training if validation loss does not improve
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


7. Training the LSTM Model

In [97]:
# Shift labels if they are in the range of 1-6
y_train -= 1
y_test -= 1
from keras.utils import to_categorical
# Now apply one-hot encoding
y_train = to_categorical(y_train, num_classes=n_classes)
y_test = to_categorical(y_test, num_classes=n_classes)


In [41]:
# Training the model
history = model.fit(X_train, y_train,
                    batch_size=batch_size,
                    validation_data=(X_test, y_test),
                    epochs=epochs,
                    callbacks=[early_stopping])


Epoch 1/50
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 50ms/step - accuracy: 0.5571 - loss: 1.1765 - val_accuracy: 0.6641 - val_loss: 1.2166
Epoch 2/50
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 46ms/step - accuracy: 0.8638 - loss: 0.3875 - val_accuracy: 0.8140 - val_loss: 0.5190
Epoch 3/50
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 48ms/step - accuracy: 0.9003 - loss: 0.2812 - val_accuracy: 0.8968 - val_loss: 0.3307
Epoch 4/50
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 48ms/step - accuracy: 0.9213 - loss: 0.2054 - val_accuracy: 0.8982 - val_loss: 0.3426
Epoch 5/50
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 47ms/step - accuracy: 0.9296 - loss: 0.1847 - val_accuracy: 0.8829 - val_loss: 0.3599
Epoch 6/50
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 48ms/step - accuracy: 0.9345 - loss: 0.1878 - val_accuracy: 0.8975 - val_loss: 0.3488
Epoch 7/50
[1m2

In [99]:
# Function to predict activity for a single sample and compare with the actual label
def predict_and_compare(model, sample, actual_label):
    """
    This function takes a pre-trained model, a single input sample,
    and the actual label, and returns both the predicted and actual activity labels.

    Parameters:
    model (keras.Model): The trained LSTM model.
    sample (numpy.ndarray): A single sample to predict on. Shape should be (1, timesteps, features).
    actual_label (int): The actual label of the sample for comparison.

    Returns:
    str: Predicted activity label.
    str: Actual activity label.
    """
    # Ensure that the sample has the right shape (1, timesteps, features)
    sample = sample.reshape(1, sample.shape[0], sample.shape[1])

    # Predict the class probabilities for the input sample
    prediction = model.predict(sample)

    # Convert predicted probabilities to class label
    predicted_class = np.argmax(prediction, axis=1)

    # Map predicted and actual class index to activity labels
    activity_labels = {
        0: "LAYING",
        1: "SITTING",
        2: "STANDING",
        3: "WALKING",
        4: "WALKING_DOWNSTAIRS",
        5: "WALKING_UPSTAIRS"
    }

    # Return the predicted and actual activity labels
    predicted_activity = activity_labels[predicted_class[0]]
    actual_activity = activity_labels[actual_label]

    return predicted_activity, actual_activity

# Example of using the function for a single sample
# Let's assume `X_test[0]` is your sample and `y_test[0]` is the actual label
sample = X_test[0]  # Replace this with the actual sample you want to predict on
actual_label = np.argmax(y_test[0])  # Decode the actual label from one-hot encoding

predicted_activity, actual_activity = predict_and_compare(model, sample, actual_label)

print(f"Predicted Activity: {predicted_activity}")
print(f"Actual Activity: {actual_activity}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 375ms/step
Predicted Activity: WALKING_UPSTAIRS
Actual Activity: WALKING_DOWNSTAIRS


In [101]:
from keras.layers import GRU
# Setting seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Hyperparameters
n_classes = 6  # Assuming 6 activity classes
n_hidden = 32  # Number of hidden units
epochs = 50
batch_size = 16
timesteps = 128  # Assuming same time steps from the data shape
input_dim = 9  # Number of features (e.g., from accelerometer and gyroscope data)

# Early stopping callback to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

Step 2: Define the GRU Model Architecture

In [104]:
# Define the GRU model
model = Sequential()

# Adding the GRU layers
model.add(GRU(n_hidden, input_shape=(timesteps, input_dim), return_sequences=True))
model.add(GRU(n_hidden))

# Adding Dropout layer for regularization
model.add(Dropout(0.5))

# Dense layer for output with softmax activation
model.add(Dense(n_classes, activation='softmax'))

# Compile the model with categorical crossentropy
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Summary of the model
model.summary()


  super().__init__(**kwargs)


Step 3: One-Hot Encoding the Labels

In [107]:
y_train

array([[0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0.],
       ...,
       [0., 1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.]])

Step 4: Training the GRU Model

In [109]:
# Train the model
history = model.fit(X_train, y_train,
                    batch_size=batch_size,
                    validation_data=(X_test, y_test),
                    epochs=epochs,
                    callbacks=[early_stopping])


Epoch 1/50
[1m460/460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 35ms/step - accuracy: 0.4267 - loss: 1.3679 - val_accuracy: 0.5453 - val_loss: 1.0556
Epoch 2/50
[1m460/460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 34ms/step - accuracy: 0.6483 - loss: 0.8120 - val_accuracy: 0.7774 - val_loss: 0.5581
Epoch 3/50
[1m460/460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 34ms/step - accuracy: 0.8642 - loss: 0.3949 - val_accuracy: 0.8744 - val_loss: 0.3657
Epoch 4/50
[1m460/460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 35ms/step - accuracy: 0.9284 - loss: 0.2226 - val_accuracy: 0.8873 - val_loss: 0.3467
Epoch 5/50
[1m460/460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 35ms/step - accuracy: 0.9287 - loss: 0.1969 - val_accuracy: 0.8948 - val_loss: 0.3123
Epoch 6/50
[1m460/460[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 34ms/step - accuracy: 0.9220 - loss: 0.2069 - val_accuracy: 0.9040 - val_loss: 0.2974
Epoch 7/50
[1m4

In [111]:
# Human Activity Recognition using GRU, LSTM, and Traditional Machine Learning