# Human Activity Recognition - Prediction Script

This notebook loads the trained LSTM model and predicts activities from new sensor data.

In [None]:
# ============================================
# Cell 1: Imports and Setup
# ============================================

import numpy as np
import pandas as pd
import json
from scipy import signal

# TensorFlow 1.x compatibility
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

# ✅ CRITICAL: Clear any previous graph
tf.reset_default_graph()

print("✓ TensorFlow loaded in compatibility mode")

✓ TensorFlow loaded in compatibility mode


In [None]:
# ============================================
# Cell 2: Load Model Configuration
# ============================================

# Load model metadata saved during training
with open('./model/model_info.json', 'r') as f:
    model_info = json.load(f)

n_hidden = model_info['n_hidden']
n_classes = model_info['n_classes']
n_steps = model_info['n_steps']
n_input = model_info['n_input']

LABELS = [
    "WALKING",
    "WALKING_UPSTAIRS",
    "WALKING_DOWNSTAIRS",
    "SITTING",
    "STANDING",
    "LAYING"
]

print(f"Model Configuration:")
print(f"  - Input: {n_steps} timesteps × {n_input} features")
print(f"  - Hidden: {n_hidden} units")
print(f"  - Classes: {n_classes}")
print(f"  - Training accuracy: {model_info['final_accuracy']*100:.2f}%")

Model Configuration:
  - Input: 128 timesteps × 9 features
  - Hidden: 32 units
  - Classes: 6
  - Training accuracy: 89.92%


In [None]:
# ============================================
# Cell 3: Rebuild Model Architecture (MUST match training exactly!)
# ============================================

def LSTM_RNN(_X, _weights, _biases):
    """
    LSTM neural network - must be IDENTICAL to training code
    """
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
    _X = tf.reshape(_X, [-1, n_input])
    
    # ReLU activation
    _X = tf.nn.relu(tf.matmul(_X, _weights['hidden']) + _biases['hidden'])
    _X = tf.split(_X, n_steps, 0)
    
    # Two stacked LSTM cells
    from tensorflow.python.ops import rnn_cell_impl
    lstm_cell_1 = rnn_cell_impl.LSTMCell(n_hidden, forget_bias=1.0)
    lstm_cell_2 = rnn_cell_impl.LSTMCell(n_hidden, forget_bias=1.0)
    lstm_cells = rnn_cell_impl.MultiRNNCell([lstm_cell_1, lstm_cell_2])
    
    outputs, states = tf.nn.static_rnn(lstm_cells, _X, dtype=tf.float32)
    lstm_last_output = outputs[-1]
    
    return tf.matmul(lstm_last_output, _weights['out']) + _biases['out']

# Create placeholders and variables - NAMES MUST MATCH TRAINING!
x = tf.placeholder(tf.float32, [None, n_steps, n_input], name='input_x')

weights = {
    'hidden': tf.Variable(tf.random_normal([n_input, n_hidden]), name='weights_hidden'),
    'out': tf.Variable(tf.random_normal([n_hidden, n_classes], mean=1.0), name='weights_out')
}
biases = {
    'hidden': tf.Variable(tf.random_normal([n_hidden]), name='biases_hidden'),
    'out': tf.Variable(tf.random_normal([n_classes]), name='biases_out')
}

pred = LSTM_RNN(x, weights, biases)

print("✓ Model architecture created")





✓ Model architecture created


In [None]:
# ============================================
# Cell 4: Sensor Data Preprocessing Function
# ============================================

def preprocess_sensor_data(csv_file, window_size=128, overlap=0.5):
    """
    Preprocess raw sensor data from CSV into model-ready format.
    
    The UCI HAR dataset uses 9 features per timestep:
    - body_acc_x, body_acc_y, body_acc_z (body acceleration, gravity removed)
    - body_gyro_x, body_gyro_y, body_gyro_z (angular velocity)
    - total_acc_x, total_acc_y, total_acc_z (raw accelerometer)
    
    Args:
        csv_file: Path to CSV with columns: acc_x, acc_y, acc_z, gyro_x, gyro_y, gyro_z
        window_size: Number of timesteps per window (default 128, same as UCI HAR)
        overlap: Overlap between windows (default 0.5 = 50%)
    
    Returns:
        windows: Array of shape (num_windows, 128, 9)
    """
    # Read CSV - handle both comma and semicolon delimiters
    try:
        df = pd.read_csv(csv_file, delimiter=',')
        if len(df.columns) == 1:
            df = pd.read_csv(csv_file, delimiter=';')
    except:
        df = pd.read_csv(csv_file, delimiter=';')
    
    print(f"Loaded {csv_file}")
    print(f"  Columns: {list(df.columns)}")
    print(f"  Shape: {df.shape}")
    
    # Expected columns
    required_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
    
    # Check for missing columns
    missing = [c for c in required_cols if c not in df.columns]
    if missing:
        raise ValueError(f"Missing columns: {missing}. Found: {list(df.columns)}")
    
    # Handle missing values
    df_clean = df[required_cols].dropna()
    if len(df_clean) < window_size:
        raise ValueError(f"Need at least {window_size} samples, got {len(df_clean)}")
    
    # Extract sensor data
    acc = df_clean[['acc_x', 'acc_y', 'acc_z']].values
    gyro = df_clean[['gyro_x', 'gyro_y', 'gyro_z']].values
    
    # Apply Butterworth high-pass filter to separate body acceleration from gravity
    # UCI HAR uses 0.3 Hz cutoff at 50 Hz sampling rate
    b, a = signal.butter(3, 0.3, btype='high', fs=50)
    body_acc = np.zeros_like(acc)
    for i in range(3):
        body_acc[:, i] = signal.filtfilt(b, a, acc[:, i])
    
    # Total acceleration is the raw accelerometer data
    total_acc = acc
    
    # Combine all 9 features in the same order as UCI HAR:
    # [body_acc_x, body_acc_y, body_acc_z, gyro_x, gyro_y, gyro_z, total_acc_x, total_acc_y, total_acc_z]
    all_features = np.hstack([body_acc, gyro, total_acc])
    
    # Create sliding windows
    step_size = int(window_size * (1 - overlap))
    windows = []
    
    for start in range(0, len(all_features) - window_size + 1, step_size):
        window = all_features[start:start + window_size]
        windows.append(window)
    
    windows = np.array(windows, dtype=np.float32)
    print(f"  Created {len(windows)} windows of shape (128, 9)")
    
    return windows

print("✓ Preprocessing function defined")

✓ Preprocessing function defined


In [None]:
# ============================================
# Cell 5: Prediction Function
# ============================================

def predict_activity(csv_file):
    """
    Load model, preprocess data, and predict activity.
    
    Args:
        csv_file: Path to CSV file with sensor data
    
    Returns:
        activity: Predicted activity label
        confidence: Confidence percentage
    """
    # Preprocess the sensor data
    windows = preprocess_sensor_data(csv_file)
    
    # Create saver and restore model
    saver = tf.train.Saver()
    
    with tf.Session() as sess:
        # Restore trained weights
        saver.restore(sess, "./model/lstm_model.ckpt")
        print("✓ Model restored from checkpoint")
        
        # Make predictions for all windows
        predictions_raw = sess.run(pred, feed_dict={x: windows})
        probabilities = sess.run(tf.nn.softmax(predictions_raw))
        predicted_classes = predictions_raw.argmax(axis=1)
        
        # Count predictions
        from collections import Counter
        vote_counts = Counter(predicted_classes)
        
        # Get the most common prediction
        most_common_class = vote_counts.most_common(1)[0][0]
        vote_count = vote_counts.most_common(1)[0][1]
        
        # Calculate average confidence for the predicted class
        confidences = [probabilities[i][predicted_classes[i]] 
                      for i in range(len(windows))]
        avg_confidence = np.mean(confidences) * 100
        
        # Print results
        print(f"\n{'='*50}")
        print(f"PREDICTION RESULTS")
        print(f"{'='*50}")
        print(f"\n  Activity: {LABELS[most_common_class]}")
        print(f"  Confidence: {avg_confidence:.1f}%")
        print(f"  Windows: {vote_count}/{len(windows)} voted for this class")
        print(f"\n  Vote breakdown:")
        for cls, count in vote_counts.most_common():
            pct = count / len(windows) * 100
            print(f"    {LABELS[cls]:20s}: {count:3d} ({pct:.1f}%)")
        print(f"{'='*50}\n")
        
        return LABELS[most_common_class], avg_confidence

print("✓ Prediction function defined")

✓ Prediction function defined


# Run Predictions

Change the file path below to predict on different sensor data files.

In [None]:
# ============================================
# Cell 6: Run Prediction on Your Data
# ============================================

# # Test with walking data
# activity, confidence = predict_activity('sensor_data/walking_data2.csv')

# Uncomment to test other activities:
activity, confidence = predict_activity('sensor_data/standing_data.csv')
# activity, confidence = predict_activity('sensor_data/laying_data.csv')

Loaded sensor_data/standing_data.csv
  Columns: ['timestamp', 'acc_z', 'acc_y', 'acc_x', 'gyro_z', 'gyro_y', 'gyro_x']
  Shape: (4685, 7)
  Created 72 windows of shape (128, 9)
INFO:tensorflow:Restoring parameters from ./model/lstm_model.ckpt
✓ Model restored from checkpoint

PREDICTION RESULTS

  Activity: LAYING
  Confidence: 19.6%
  Windows: 72/72 voted for this class

  Vote breakdown:
    LAYING              :  72 (100.0%)



In [None]:
# ============================================
# DEBUG: Test model with UCI HAR training data
# If it fails on training data too, the model checkpoint is bad
# ============================================

# Load actual UCI HAR data that the model was trained on
uci_path = "data/UCI_HAR_Dataset/"
X_test_signals = []
for sig in ["body_acc_x_", "body_acc_y_", "body_acc_z_", 
            "body_gyro_x_", "body_gyro_y_", "body_gyro_z_",
            "total_acc_x_", "total_acc_y_", "total_acc_z_"]:
    data = np.loadtxt(f"{uci_path}test/Inertial Signals/{sig}test.txt")
    X_test_signals.append(data)

X_test = np.transpose(np.array(X_test_signals), (1, 2, 0))
y_test = np.loadtxt(uci_path + "test/y_test.txt", dtype=int) - 1  # 0-indexed

print(f"Loaded UCI HAR test data: {X_test.shape}")
print(f"Testing model on ORIGINAL training data...\n")

# Test with the model
saver = tf.train.Saver()
with tf.Session() as sess:
    saver.restore(sess, "./model/lstm_model.ckpt")
    
    # Test on 10 samples of each class
    for class_idx in range(6):
        indices = np.where(y_test == class_idx)[0][:5]
        correct = 0
        for idx in indices:
            sample = X_test[idx:idx+1]
            pred_raw = sess.run(pred, feed_dict={x: sample})
            pred_class = np.argmax(pred_raw)
            if pred_class == class_idx:
                correct += 1
        print(f"{LABELS[class_idx]:20s}: {correct}/5 correct")
    
    # Overall accuracy on first 100 samples
    pred_all = sess.run(pred, feed_dict={x: X_test[:100]})
    pred_classes = np.argmax(pred_all, axis=1)
    accuracy = np.mean(pred_classes == y_test[:100])
    print(f"\nOverall accuracy on 100 test samples: {accuracy*100:.1f}%")
    print(f"\n⚠️ If accuracy is ~16-20%, the saved model is corrupted!")
    print(f"   You need to RETRAIN the model with a fresh kernel.")

Loaded UCI HAR test data: (2947, 128, 9)
Testing model on ORIGINAL training data...

INFO:tensorflow:Restoring parameters from ./model/lstm_model.ckpt
WALKING             : 0/5 correct
WALKING_UPSTAIRS    : 0/5 correct
WALKING_DOWNSTAIRS  : 0/5 correct
SITTING             : 0/5 correct
STANDING            : 0/5 correct
LAYING              : 5/5 correct

Overall accuracy on 100 test samples: 24.0%

⚠️ If accuracy is ~16-20%, the saved model is corrupted!
   You need to RETRAIN the model with a fresh kernel.
