In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Importing required libraries
import numpy as np
from pandas import read_csv
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.utils import to_categorical
from numpy import mean, std


# Loading and preparing dataset
def load_data():
    # Defining training data path and signal files
    train_path = '/content/drive/MyDrive/UCI HAR Dataset/UCI HAR Dataset/train'
    train_files = [
        '/Inertial Signals/total_acc_x_train.txt',
        '/Inertial Signals/total_acc_y_train.txt',
        '/Inertial Signals/total_acc_z_train.txt',
        '/Inertial Signals/body_acc_x_train.txt',
        '/Inertial Signals/body_acc_y_train.txt',
        '/Inertial Signals/body_acc_z_train.txt',
        '/Inertial Signals/body_gyro_x_train.txt',
        '/Inertial Signals/body_gyro_y_train.txt',
        '/Inertial Signals/body_gyro_z_train.txt'
    ]

    # Loading training signals
    X_train = np.dstack([
        read_csv(train_path + f, header=None, delim_whitespace=True).values
        for f in train_files
    ])
    # Loading and one-hot encoding training labels
    y_train = to_categorical(
        read_csv(train_path + '/y_train.txt', header=None)[0] - 1
    )

    # Defining testing data path and corresponding files
    test_path = '/content/drive/MyDrive/UCI HAR Dataset/UCI HAR Dataset/test'
    test_files = [f.replace('train', 'test') for f in train_files]

    # Loading testing signals
    X_test = np.dstack([
        read_csv(test_path + f, header=None, delim_whitespace=True).values
        for f in test_files
    ])
    # Loading and one-hot encoding testing labels
    y_test = to_categorical(
        read_csv(test_path + '/y_test.txt', header=None)[0] - 1
    )

    return X_train, y_train, X_test, y_test

# Loading dataset
X_train, y_train, X_test, y_test = load_data()

# Initializing accuracy storage
accuracies = []

# Running experiment 10 times
for run in range(10):
    print(f'\nRun {run + 1}/10')

    # Defining LSTM model
    model = Sequential()
    model.add(LSTM(100, input_shape=(128, 9)))
    model.add(Dropout(0.5))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(6, activation='softmax'))

    # Compiling model
    model.compile(
        loss='categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )

    # Training model
    model.fit(X_train, y_train, epochs=15, batch_size=64, verbose=0)

    # Evaluating model
    _, accuracy = model.evaluate(X_test, y_test, verbose=0)
    accuracies.append(accuracy)
    print(f'Test accuracy: {accuracy:.3f}')

# Printing model accuracies
print('\nAll accuracies:', [f'{acc:.3f}' for acc in accuracies])
print(f'Mean accuracy: {mean(accuracies):.3f} (± {std(accuracies):.3f})')


  read_csv(train_path + f, header=None, delim_whitespace=True).values
  read_csv(train_path + f, header=None, delim_whitespace=True).values
  read_csv(train_path + f, header=None, delim_whitespace=True).values
  read_csv(train_path + f, header=None, delim_whitespace=True).values
  read_csv(train_path + f, header=None, delim_whitespace=True).values
  read_csv(train_path + f, header=None, delim_whitespace=True).values
  read_csv(train_path + f, header=None, delim_whitespace=True).values
  read_csv(train_path + f, header=None, delim_whitespace=True).values
  read_csv(train_path + f, header=None, delim_whitespace=True).values
  read_csv(test_path + f, header=None, delim_whitespace=True).values
  read_csv(test_path + f, header=None, delim_whitespace=True).values
  read_csv(test_path + f, header=None, delim_whitespace=True).values
  read_csv(test_path + f, header=None, delim_whitespace=True).values
  read_csv(test_path + f, header=None, delim_whitespace=True).values
  read_csv(test_path + f,


Run 1/10


  super().__init__(**kwargs)


Test accuracy: 0.891

Run 2/10
Test accuracy: 0.909

Run 3/10
Test accuracy: 0.880

Run 4/10
Test accuracy: 0.905

Run 5/10
Test accuracy: 0.918

Run 6/10
Test accuracy: 0.883

Run 7/10
Test accuracy: 0.910

Run 8/10
Test accuracy: 0.911

Run 9/10
Test accuracy: 0.902

Run 10/10
Test accuracy: 0.867

All accuracies: ['0.891', '0.909', '0.880', '0.905', '0.918', '0.883', '0.910', '0.911', '0.902', '0.867']
Mean accuracy: 0.897 (± 0.016)


# **REPORT**

1. Why was this problem suitable for LSTM and not just any other machine  
   learning algorithm?

  Ans. This problem 'Human Activity Recognition' from time-series sensor data involves patterns that evolve over time which is suitable for LSTM due to the involvement of sequential data having sequences. Each section of the sequence has a relationship with the data preceding or succeeding it and this relationship contains a lot of useful information. This sequential data is not i.i.d (independent and identically distributed) which is the reason why other machine learning algorithms fail to solve this problem. LSTMs, on the other hand, maintain an internal state (memory) across time steps, enabling them to learn both short and long term dependencies in the sensor signals.

2. Is a many-to-many model a more real-world scenario for this problem or a     
   many-to one model and why?

  Ans. A many-to-one model would be a more real-world scenario for this lab problem because each window of time-series data is mapped to a single activity label.  In real-time applications, many-to-many may
be used for continuous activity prediction, but for this dataset, each sequence
(window) corresponds to one label.

3. Explain the meaning of “Short Memory” and “Long Memory” in context of LSTMs.

 Ans. Hidden State in LSTM is known as the Short Term Memory while Cell State is known as the Long Term Memory. 'Short Memory' refers to the model's ability to remember recent inputs, while 'Long Memory' refers to retaining information from much earlier in the sequence.

4. What are the three parts of an LSTM node and their functions?

 Ans. Three parts of LSTM are:

 (i) Forget Gate: The first part, the forget gate, chooses if the information coming from the previous timestamp should be remembered or is irrelevant and should be forgotten.

 (ii) Input Gate: The second part, the input gate, learns new information from the input to this cell.

 (iii) Ouput Gate:  In the third part, the output gate, the cell passes the updated information from the current timestamp to the next timestamp.

5. What problems do RNNs face that LSTMs are able to adapt to fix?

 Ans. RNNs usually face the problems of:

  (i) Exploding and vanishing gradients: In long sequences, gradients can shrink or grow exponentially, making training unstable or impossible.

  (ii) Short-range dependency learning: Standard RNNs struggle to retain relevant information over many steps.

  LSTMs address these issues by providing gated pathways and a nearly constant error cell state to learn longer sequences effectively.