# Human Activity Recognition - Prediction Script

This notebook loads the trained LSTM model and predicts activities from new sensor data.

# Imports and Setup

In [1]:
import numpy as np
import pandas as pd
import json
from scipy import signal
from tensorflow.python.ops import rnn_cell_impl
from collections import Counter

# TensorFlow 1.x compatibility
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

print("‚úì TensorFlow loaded in compatibility mode")

Instructions for updating:
non-resource variables are not supported in the long term
‚úì TensorFlow loaded in compatibility mode


  if not hasattr(np, "object"):


# Load Model Configuration

In [2]:
# Load model metadata saved during training
model_path = './model/lstm_model_v002'

with open(f'{model_path}/model_info.json', 'r') as f:
    model_info = json.load(f)

n_hidden = model_info['n_hidden']
n_classes = model_info['n_classes']
n_steps = model_info['n_steps']
n_input = model_info['n_input']

LABELS = [
    "WALKING",
    "WALKING_UPSTAIRS",
    "WALKING_DOWNSTAIRS",
    "SITTING",
    "STANDING",
    "LAYING"
]

print(f"Model Configuration:")
print(f"  - Input: {n_steps} timesteps √ó {n_input} features")
print(f"  - Hidden: {n_hidden} units")
print(f"  - Classes: {n_classes}")
print(f"  - Training accuracy: {model_info['final_accuracy']*100:.2f}%")

Model Configuration:
  - Input: 128 timesteps √ó 9 features
  - Hidden: 32 units
  - Classes: 6
  - Training accuracy: 91.18%


# Rebuild Model Architecture (MUST match training exactly!)

In [3]:
# ‚úÖ CRITICAL: Clear any previous graph
tf.reset_default_graph()

def LSTM_RNN(_X, _weights, _biases):
    """
    LSTM neural network - must be IDENTICAL to training code
    """
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
    _X = tf.reshape(_X, [-1, n_input])
    
    # ReLU activation
    _X = tf.nn.relu(tf.matmul(_X, _weights['hidden']) + _biases['hidden'])
    _X = tf.split(_X, n_steps, 0)
    
    # Two stacked LSTM cells
    lstm_cell_1 = rnn_cell_impl.LSTMCell(n_hidden, forget_bias=1.0)
    lstm_cell_2 = rnn_cell_impl.LSTMCell(n_hidden, forget_bias=1.0)
    lstm_cells = rnn_cell_impl.MultiRNNCell([lstm_cell_1, lstm_cell_2])
    
    outputs, states = tf.nn.static_rnn(lstm_cells, _X, dtype=tf.float32)
    lstm_last_output = outputs[-1]
    
    return tf.matmul(lstm_last_output, _weights['out']) + _biases['out']

# Create placeholders and variables - NAMES MUST MATCH TRAINING!
x = tf.placeholder(tf.float32, [None, n_steps, n_input], name='input_x')

weights = {
    'hidden': tf.Variable(tf.random_normal([n_input, n_hidden]), name='weights_hidden'),
    'out': tf.Variable(tf.random_normal([n_hidden, n_classes], mean=1.0), name='weights_out')
}
biases = {
    'hidden': tf.Variable(tf.random_normal([n_hidden]), name='biases_hidden'),
    'out': tf.Variable(tf.random_normal([n_classes]), name='biases_out')
}

pred = LSTM_RNN(x, weights, biases)
print("‚úì Model architecture created")

Instructions for updating:
Please use `keras.layers.RNN(cell, unroll=True)`, which is equivalent to this API
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor




‚úì Model architecture created


# Sensor Data Preprocessing Function

In [5]:
def preprocess_sensor_data(csv_file, window_size=128, overlap=0.5, input_hz=100, target_hz=50):
    """
    Preprocess raw sensor data from CSV into model-ready format.
    
    The UCI HAR dataset uses 9 features per timestep at 50 Hz:
    - body_acc_x, body_acc_y, body_acc_z (body acceleration, gravity removed)
    - body_gyro_x, body_gyro_y, body_gyro_z (angular velocity)
    - total_acc_x, total_acc_y, total_acc_z (raw accelerometer)
    
    IMPORTANT: Phone orientation must match UCI HAR - X-axis pointing UP when standing
    
    Args:
        csv_file: Path to CSV with columns: acc_x, acc_y, acc_z, gyro_x, gyro_y, gyro_z
        window_size: Number of timesteps per window (default 128, same as UCI HAR)
        overlap: Overlap between windows (default 0.5 = 50%)
        input_hz: Sampling rate of your sensor data (default 100 Hz for Sensor Logger at 10ms)
        target_hz: Target sampling rate for the model (default 50 Hz for UCI HAR)
    
    Returns:
        windows: Array of shape (num_windows, 128, 9)
    """
    # Read CSV - handle both comma and semicolon delimiters
    try:
        df = pd.read_csv(csv_file, delimiter=',')
        if len(df.columns) == 1:
            df = pd.read_csv(csv_file, delimiter=';')
    except:
        df = pd.read_csv(csv_file, delimiter=';')
    
    print(f"Loaded {csv_file}")
    print(f"  Columns: {list(df.columns)}")
    print(f"  Original shape: {df.shape}")
    
    # Expected columns
    required_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
    
    # Check for missing columns
    missing = [c for c in required_cols if c not in df.columns]
    # missing = []
    # for c in required_cols:
    #     if c not in df.columns:
    #         missing.append(c)
    if missing:
        raise ValueError(f"Missing columns: {missing}. Found: {list(df.columns)}")
    
    

    # Handle missing values
    df_clean = df[required_cols].dropna()
    
    # DOWNSAMPLE from input_hz to target_hz 
    if input_hz != target_hz:
        downsample_factor = input_hz // target_hz
        df_clean = df_clean.iloc[::downsample_factor].reset_index(drop=True)
        
        print(f"  ‚ö° Downsampling: {input_hz} Hz -> {target_hz} Hz (taking every {downsample_factor}th sample)")
        print(f"  Shape after downsampling: {df_clean.shape}")
    
    if len(df_clean) < window_size:
        raise ValueError(f"Need at least {window_size} samples, got {len(df_clean)}")
    
    # Extract sensor data
    acc = df_clean[['acc_x', 'acc_y', 'acc_z']].to_numpy()
    gyro = df_clean[['gyro_x', 'gyro_y', 'gyro_z']].to_numpy()

    # Apply Butterworth high-pass filter to separate body acceleration from gravity
    # UCI HAR uses 0.3 Hz cutoff at 50 Hz sampling rate
    b, a = signal.butter(3, 0.3, btype='high', fs=target_hz)
    body_acc = np.zeros_like(acc)
    for i in range(3):
        body_acc[:, i] = signal.filtfilt(b, a, acc[:, i])
    
    # Total acceleration is the raw accelerometer data
    total_acc = acc
    
    # Combine all 9 features in the same order as UCI HAR:
    # [body_acc_x, body_acc_y, body_acc_z, gyro_x, gyro_y, gyro_z, total_acc_x, total_acc_y, total_acc_z]
    all_features = np.hstack([body_acc, gyro, total_acc])
    
    # Create sliding windows
    step_size = int(window_size * (1 - overlap))
    windows = []
    
    for start in range(0, len(all_features) - window_size + 1, step_size):
        window = all_features[start:start + window_size]
        windows.append(window)
    
    windows = np.array(windows, dtype=np.float32)
    print(f"  Created {len(windows)} windows of shape (128, 9)")
    print(f"  Each window = {window_size / target_hz:.2f} seconds of activity")
    
    return windows

print("‚úì Preprocessing function defined (100Hz ‚Üí 50Hz downsampling)")

‚úì Preprocessing function defined (100Hz ‚Üí 50Hz downsampling)


# Prediction Function

In [6]:
def predict_activity(csv_file, model_path):
    """
    Load model, preprocess data, and predict activity.
    
    Args:
        csv_file: Path to CSV file with sensor data
    
    Returns:
        activity: Predicted activity label
        confidence: Confidence percentage
    """
    # Preprocess the sensor data
    windows = preprocess_sensor_data(csv_file)
    
    # Create saver and restore model
    saver = tf.train.Saver()
    
    with tf.Session() as sess:
        # Restore trained weights
        #saver.restore(sess, "./model/lstm_model.ckpt")
        saver.restore(sess, f"{model_path}/model.ckpt")
        print("‚úì Model restored from checkpoint")
        
        # Make predictions for all windows
        predictions_raw = sess.run(pred, feed_dict={x: windows})
        probabilities = sess.run(tf.nn.softmax(predictions_raw))
        predicted_classes = predictions_raw.argmax(axis=1)
        
        # Count predictions
        from collections import Counter
        
        vote_counts = Counter(predicted_classes)
        # Get the most common prediction
        most_common_class = vote_counts.most_common(1)[0][0]
        vote_count = vote_counts.most_common(1)[0][1]
        
        # Calculate average confidence for the predicted class
        confidences = [probabilities[i][predicted_classes[i]] 
                      for i in range(len(windows))]
        avg_confidence = np.mean(confidences) * 100
        
        # Print results
        print(f"\n{'='*50}")
        print(f"PREDICTION RESULTS")
        print(f"{'='*50}")
        print(f"\n  Activity: {LABELS[most_common_class]}")
        print(f"  Confidence: {avg_confidence:.1f}%")
        print(f"  Windows: {vote_count}/{len(windows)} voted for this class")
        print(f"\n  Vote breakdown:")
        for cls, count in vote_counts.most_common():
            pct = count / len(windows) * 100
            print(f"    {LABELS[cls]:20s}: {count:3d} ({pct:.1f}%)")
        print(f"{'='*50}\n")
        
        return LABELS[most_common_class], avg_confidence

print("‚úì Prediction function defined")

‚úì Prediction function defined


In [12]:
import os

file_path = './sensor_data/walking2_data.csv'

# Check if file exists
if os.path.exists(file_path):
    print(f"‚úì File exists: {file_path}")
    
    # Check file size
    file_size = os.path.getsize(file_path)
    print(f"  File size: {file_size} bytes")
    
    if file_size == 0:
        print("  ‚ö†Ô∏è FILE IS EMPTY!")
    else:
        # Try to read first few lines
        with open(file_path, 'r') as f:
            print("\n  First 5 lines:")
            for i, line in enumerate(f):
                if i >= 5:
                    break
                print(f"    {i}: {line.strip()}")
else:
    print(f"‚úó File does NOT exist: {file_path}")
    print("\n  Available files:")
    if os.path.exists('./sensor_data/'):
        for filename in os.listdir('./sensor_data/'):
            print(f"    - {filename}")

‚úì File exists: ./sensor_data/walking2_data.csv
  File size: 948372 bytes

  First 5 lines:
    0: timestamp;acc_z;acc_y;acc_x;gyro_z;gyro_y;gyro_x
    1: 1767708643575376600;0.0420989990234375;0.0620880126953125;0.990997314453125;0.03930852189660072;-0.021207816898822784;0.05567098408937454
    2: 1767708643585378000;0.0433197021484375;0.0545501708984375;0.987701416015625;0.04080016165971756;-0.03068946860730648;0.051896754652261734
    3: 1767708643595378000;0.0416259765625;0.045318603515625;0.985809326171875;0.03905152902007103;-0.038592930883169174;0.05770430713891983
    4: 1767708643605377800;0.0326690673828125;0.044281005859375;0.979400634765625;0.03573775663971901;-0.04701331630349159;0.07068055868148804


# Run Predictions

Change the file path below to predict on different sensor data files.

In [7]:
# ============================================
# Cell 6: Run Prediction on Your Data
# ============================================
walking = './sensor_data/walking2_data.csv'
sitting = './sensor_data/sitting2_data.csv'
laying = './sensor_data/laying2_data.csv'
standing = './sensor_data/standing2_data.csv'

# Uncomment to test other activities:
activity, confidence = predict_activity(walking, model_path)
#activity, confidence = predict_activity(sitting, model_path)
# activity, confidence = predict_activity(laying, model_path)
# activity, confidence = predict_activity(standing, model_path)

Loaded ./sensor_data/walking2_data.csv
  Columns: ['timestamp', 'acc_z', 'acc_y', 'acc_x', 'gyro_z', 'gyro_y', 'gyro_x']
  Original shape: (6963, 7)
  ‚ö° Downsampling: 100 Hz -> 50 Hz (taking every 2th sample)
  Shape after downsampling: (3482, 6)
  Created 53 windows of shape (128, 9)
  Each window = 2.56 seconds of activity
INFO:tensorflow:Restoring parameters from ./model/lstm_model_v002/model.ckpt


I0000 00:00:1768399808.959277 14714536 mlir_graph_optimization_pass.cc:437] MLIR V1 optimization pass is not enabled


‚úì Model restored from checkpoint

PREDICTION RESULTS

  Activity: WALKING_DOWNSTAIRS
  Confidence: 86.5%
  Windows: 44/53 voted for this class

  Vote breakdown:
    WALKING_DOWNSTAIRS  :  44 (83.0%)
    WALKING_UPSTAIRS    :   7 (13.2%)
    SITTING             :   2 (3.8%)



## üì¶ SQLite Database Functions

Save predictions and sensor data for later retraining.

In [None]:
import sqlite3
import pickle
from datetime import datetime

def save_to_database(csv_file, predicted_activity, confidence, windows, model_version, corrected_label=None):
    """
    Save prediction results and sensor data to SQLite database.
    
    Args:
        csv_file: Path to original CSV
        predicted_activity: Predicted label (e.g., "WALKING")
        confidence: Confidence percentage
        windows: Preprocessed sensor windows (numpy array) - shape (num_windows, 128, 9)
        model_version: Model version used (e.g., "v002")
        corrected_label: User-corrected label if different from prediction (optional)
    
    Returns:
        prediction_id: Database ID for this prediction
    """
    conn = sqlite3.connect('har_predictions.db')
    cursor = conn.cursor()
    
    # Stores prediction data
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS predictions (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            timestamp TEXT NOT NULL,
            file_path TEXT NOT NULL,
            predicted_activity TEXT NOT NULL,
            predicted_label INTEGER NOT NULL,
            corrected_label INTEGER,
            confidence REAL NOT NULL,
            model_version TEXT NOT NULL,
            num_windows INTEGER NOT NULL,
            is_correct BOOLEAN
        )
    ''')

    # This table stores raw ML training data.
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS training_windows (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            prediction_id INTEGER NOT NULL,
            window_index INTEGER NOT NULL,
            window_data BLOB NOT NULL,
            label INTEGER NOT NULL,
            FOREIGN KEY (prediction_id) REFERENCES predictions(id)
        )
    ''')
    
    # Convert activity name to label (0-5)
    predicted_label = LABELS.index(predicted_activity)
    corrected_label_int = corrected_label if corrected_label is not None else None
    is_correct = corrected_label is None or (corrected_label == predicted_label)
    
    # Insert prediction record
    cursor.execute('''
        INSERT INTO predictions 
        (timestamp, file_path, predicted_activity, predicted_label, corrected_label, 
         confidence, model_version, num_windows, is_correct)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
    ''', (
        datetime.now().isoformat(),
        csv_file,
        predicted_activity,
        predicted_label,
        corrected_label_int,
        confidence,
        model_version,
        len(windows),
        is_correct
    ))
    
    prediction_id = cursor.lastrowid
    
    # Insert sensor windows (use pickle for efficient storage)
    label_to_use = corrected_label_int if corrected_label_int is not None else predicted_label
    
    for i, window in enumerate(windows):
        # Serialize numpy array (128, 9) to binary
        window_blob = pickle.dumps(window)
        
        cursor.execute('''
            INSERT INTO training_windows (prediction_id, window_index, window_data, label)
            VALUES (?, ?, ?, ?)
        ''', (prediction_id, i, window_blob, label_to_use))
    
    conn.commit()
    conn.close()
    
    print(f"‚úì Saved to database: prediction_id={prediction_id}")
    print(f"  - Stored {len(windows)} windows")
    print(f"  - Label: {predicted_label} ({predicted_activity})")
    if corrected_label_int is not None:
        print(f"  - Corrected to: {corrected_label_int} ({LABELS[corrected_label_int]})")
    
    return prediction_id

print("‚úì Database save function defined")

‚úì Database save function defined


In [9]:
def load_training_data(limit=None, filter_correct=False):
    """
    Load all stored predictions from database for retraining.
    
    Args:
        limit: Maximum number of windows to load (None = all)
        filter_correct: If True, only load user-corrected predictions
    
    Returns:
        X_new: np.array of shape (total_windows, 128, 9) - ready for training!
        y_new: np.array of shape (total_windows, 1) - labels (0-5)
    """
    conn = sqlite3.connect('har_predictions.db')
    cursor = conn.cursor()
    
    # Build query based on filters
    query = 'SELECT window_data, label FROM training_windows'
    
    if filter_correct:
        query = '''
            SELECT tw.window_data, tw.label 
            FROM training_windows tw
            JOIN predictions p ON tw.prediction_id = p.id
            WHERE p.corrected_label IS NOT NULL
        '''
    
    if limit:
        query += f' LIMIT {limit}'
    
    cursor.execute(query)
    rows = cursor.fetchall()
    
    X_new = []
    y_new = []
    
    for window_blob, label in rows:
        # Deserialize numpy array
        window = pickle.loads(window_blob)
        X_new.append(window)
        y_new.append(label)
    
    conn.close()
    
    if len(X_new) == 0:
        print("‚ö†Ô∏è No training data found in database")
        return None, None
    
    X_new = np.array(X_new, dtype=np.float32)
    y_new = np.array(y_new, dtype=np.int32).reshape(-1, 1)
    
    print(f"‚úì Loaded {len(X_new)} windows from database")
    print(f"  - Shape: {X_new.shape}")
    print(f"  - Labels shape: {y_new.shape}")
    print(f"  - Label distribution:")
    
    from collections import Counter
    label_counts = Counter(y_new.flatten())
    for label_idx, count in sorted(label_counts.items()):
        print(f"    {LABELS[label_idx]:20s}: {count:5d} windows")
    
    return X_new, y_new

print("‚úì Data loader function defined")

‚úì Data loader function defined


In [10]:
def view_database_stats():
    """
    View statistics about stored predictions.
    """
    conn = sqlite3.connect('har_predictions.db')
    cursor = conn.cursor()
    
    # Count predictions
    cursor.execute('SELECT COUNT(*) FROM predictions')
    num_predictions = cursor.fetchone()[0]
    
    # Count windows
    cursor.execute('SELECT COUNT(*) FROM training_windows')
    num_windows = cursor.fetchone()[0]
    
    # Get predictions by activity
    cursor.execute('''
        SELECT predicted_activity, COUNT(*) as count 
        FROM predictions 
        GROUP BY predicted_activity
    ''')
    activity_counts = cursor.fetchall()
    
    # Get corrected predictions
    cursor.execute('SELECT COUNT(*) FROM predictions WHERE corrected_label IS NOT NULL')
    num_corrected = cursor.fetchone()[0]
    
    conn.close()
    
    print(f"\n{'='*50}")
    print(f"DATABASE STATISTICS")
    print(f"{'='*50}")
    print(f"\n  Total predictions: {num_predictions}")
    print(f"  Total windows: {num_windows}")
    print(f"  User-corrected: {num_corrected}")
    print(f"\n  Predictions by activity:")
    for activity, count in activity_counts:
        print(f"    {activity:20s}: {count:3d}")
    print(f"{'='*50}\n")

print("‚úì Database stats function defined")

‚úì Database stats function defined


## üîÑ Prediction with Database Integration

Enhanced prediction function that saves results to database.

In [11]:
def predict_and_save(csv_file, model_path, save_to_db=True, corrected_label=None):
    """
    Predict activity and optionally save to database for retraining.
    
    Args:
        csv_file: Path to CSV file with sensor data
        model_path: Path to model directory (e.g., "model/lstm_model_v002")
        save_to_db: Whether to save results to database (default: True)
        corrected_label: If user corrects prediction, provide correct label (0-5)
    
    Returns:
        activity: Predicted activity label
        confidence: Confidence percentage
        prediction_id: Database ID (if saved)
    """
    # Preprocess the sensor data
    windows = preprocess_sensor_data(csv_file)
    
    # Create saver and restore model
    saver = tf.train.Saver()
    
    with tf.Session() as sess:
        # Restore trained weights
        saver.restore(sess, f"{model_path}/model.ckpt")
        print("‚úì Model restored from checkpoint")
        
        # Make predictions for all windows
        predictions_raw = sess.run(pred, feed_dict={x: windows})
        probabilities = sess.run(tf.nn.softmax(predictions_raw))
        predicted_classes = predictions_raw.argmax(axis=1)
        
        # Count predictions
        from collections import Counter
        
        vote_counts = Counter(predicted_classes)
        # Get the most common prediction
        most_common_class = vote_counts.most_common(1)[0][0]
        vote_count = vote_counts.most_common(1)[0][1]
        
        # Calculate average confidence for the predicted class
        confidences = [probabilities[i][predicted_classes[i]] 
                      for i in range(len(windows))]
        avg_confidence = np.mean(confidences) * 100
        
        # Print results
        print(f"\n{'='*50}")
        print(f"PREDICTION RESULTS")
        print(f"{'='*50}")
        print(f"\n  Activity: {LABELS[most_common_class]}")
        print(f"  Confidence: {avg_confidence:.1f}%")
        print(f"  Windows: {vote_count}/{len(windows)} voted for this class")
        print(f"\n  Vote breakdown:")
        for cls, count in vote_counts.most_common():
            pct = count / len(windows) * 100
            print(f"    {LABELS[cls]:20s}: {count:3d} ({pct:.1f}%)")
        print(f"{'='*50}\n")
        
        # Save to database if requested
        prediction_id = None
        if save_to_db:
            model_version = model_path.split('/')[-1]  # e.g., "lstm_model_v002"
            prediction_id = save_to_database(
                csv_file=csv_file,
                predicted_activity=LABELS[most_common_class],
                confidence=avg_confidence,
                windows=windows,
                model_version=model_version,
                corrected_label=corrected_label
            )
        
        return LABELS[most_common_class], avg_confidence, prediction_id

print("‚úì Enhanced prediction function defined")

‚úì Enhanced prediction function defined


## üß™ Example Usage

### Example 1: Predict and Save to Database

In [13]:
walking = './sensor_data/walking2_data.csv'
sitting = './sensor_data/sitting2_data.csv'
laying = './sensor_data/laying2_data.csv'
standing = './sensor_data/standing2_data.csv'

# Example 1: Basic prediction with database save
activity, confidence, pred_id = predict_and_save(
    csv_file= standing,
    model_path='model/lstm_model_v002',
    save_to_db=True
)

print(f"\nResult: {activity} with {confidence:.1f}% confidence")
print(f"Saved as prediction ID: {pred_id}")

Loaded ./sensor_data/standing2_data.csv
  Columns: ['timestamp', 'acc_z', 'acc_y', 'acc_x', 'gyro_z', 'gyro_y', 'gyro_x']
  Original shape: (4685, 7)
  ‚ö° Downsampling: 100 Hz -> 50 Hz (taking every 2th sample)
  Shape after downsampling: (2343, 6)
  Created 35 windows of shape (128, 9)
  Each window = 2.56 seconds of activity
INFO:tensorflow:Restoring parameters from model/lstm_model_v002/model.ckpt
‚úì Model restored from checkpoint

PREDICTION RESULTS

  Activity: LAYING
  Confidence: 98.2%
  Windows: 35/35 voted for this class

  Vote breakdown:
    LAYING              :  35 (100.0%)

‚úì Saved to database: prediction_id=2
  - Stored 35 windows
  - Label: 5 (LAYING)

Result: LAYING with 98.2% confidence
Saved as prediction ID: 2


### Example 2: Predict with User Correction

If the prediction is wrong, correct it by providing the right label:

In [None]:
# Example 2: Prediction was wrong - user corrects it
# Model predicted WALKING, but it was actually SITTING

# Label mapping reminder:
# 0: WALKING, 1: WALKING_UPSTAIRS, 2: WALKING_DOWNSTAIRS, 3: SITTING, 4: STANDING, 5: LAYING

activity, confidence, pred_id = predict_and_save(
    csv_file=walking,
    model_path='model/lstm_model_v002',
    save_to_db=True,
    corrected_label=2  # User says it's actually SITTING (label 3)
)

print(f"\nModel predicted: {activity}")
print(f"User corrected to: {LABELS[2]}")
print(f"This data is now labeled correctly for retraining!")

Loaded ./sensor_data/walking2_data.csv
  Columns: ['timestamp', 'acc_z', 'acc_y', 'acc_x', 'gyro_z', 'gyro_y', 'gyro_x']
  Original shape: (6963, 7)
  ‚ö° Downsampling: 100 Hz -> 50 Hz (taking every 2th sample)
  Shape after downsampling: (3482, 6)
  Created 53 windows of shape (128, 9)
  Each window = 2.56 seconds of activity
INFO:tensorflow:Restoring parameters from model/lstm_model_v002/model.ckpt
‚úì Model restored from checkpoint

PREDICTION RESULTS

  Activity: WALKING_DOWNSTAIRS
  Confidence: 86.5%
  Windows: 44/53 voted for this class

  Vote breakdown:
    WALKING_DOWNSTAIRS  :  44 (83.0%)
    WALKING_UPSTAIRS    :   7 (13.2%)
    SITTING             :   2 (3.8%)

‚úì Saved to database: prediction_id=5
  - Stored 53 windows
  - Label: 2 (WALKING_DOWNSTAIRS)
  - Corrected to: 2 (WALKING_DOWNSTAIRS)

Model predicted: WALKING_DOWNSTAIRS
User corrected to: SITTING
This data is now labeled correctly for retraining!


### Example 3: View Database Statistics

In [18]:
# View what's in the database
view_database_stats()


DATABASE STATISTICS

  Total predictions: 5
  Total windows: 229
  User-corrected: 3

  Predictions by activity:
    LAYING              :   2
    WALKING_DOWNSTAIRS  :   3



### Example 4: Load Data for Retraining

In [19]:
# Load all stored data (ready for training!)
X_new, y_new = load_training_data()

# Or load only user-corrected data
# X_new, y_new = load_training_data(filter_correct=True)

# Or load limited number of windows
# X_new, y_new = load_training_data(limit=1000)

if X_new is not None:
    print(f"\n‚úì Data is ready for retraining!")
    print(f"  Use this in train_model.ipynb:")
    print(f"  X_train_combined = np.vstack([X_train, X_new])")
    print(f"  y_train_combined = np.vstack([y_train, y_new])")

‚úì Loaded 229 windows from database
  - Shape: (229, 128, 9)
  - Labels shape: (229, 1)
  - Label distribution:
    WALKING             :    53 windows
    WALKING_DOWNSTAIRS  :   106 windows
    STANDING            :    35 windows
    LAYING              :    35 windows

‚úì Data is ready for retraining!
  Use this in train_model.ipynb:
  X_train_combined = np.vstack([X_train, X_new])
  y_train_combined = np.vstack([y_train, y_new])



### **Database**

**predictions table:**
- `id`: Auto-increment primary key
- `timestamp`: When prediction was made
- `file_path`: Original CSV file path
- `predicted_activity`: Predicted activity name
- `predicted_label`: Predicted label (0-5)
- `corrected_label`: User-corrected label (if applicable)
- `confidence`: Prediction confidence %
- `model_version`: Which model version made prediction
- `num_windows`: Number of windows in this prediction
- `is_correct`: Whether prediction matches correction

**training_windows table:**
- `id`: Auto-increment primary key
- `prediction_id`: Foreign key to predictions
- `window_index`: Index of window in prediction
- `window_data`: Serialized numpy array (128, 9)
- `label`: Activity label (0-5)

### **Label Mapping**
- 0: WALKING
- 1: WALKING_UPSTAIRS
- 2: WALKING_DOWNSTAIRS
- 3: SITTING
- 4: STANDING
- 5: LAYING

# DEBUG: Test model with UCI HAR training data


In [None]:
# ============================================
# DEBUG: Test model with UCI HAR training data
# If it fails on training data too, the model checkpoint is bad
# ============================================

# Load actual UCI HAR data that the model was trained on
uci_path = "UCI HAR Dataset/"
X_test_signals = []
for sig in ["body_acc_x_", "body_acc_y_", "body_acc_z_", 
            "body_gyro_x_", "body_gyro_y_", "body_gyro_z_",
            "total_acc_x_", "total_acc_y_", "total_acc_z_"]:
    data = np.loadtxt(f"{uci_path}test/Inertial Signals/{sig}test.txt")
    X_test_signals.append(data)

X_test = np.transpose(np.array(X_test_signals), (1, 2, 0))
y_test = np.loadtxt(uci_path + "test/y_test.txt", dtype=int) - 1  # 0-indexed

print(f"Loaded UCI HAR test data: {X_test.shape}")
print(f"Testing model on ORIGINAL training data...\n")

# Test with the model
saver = tf.train.Saver()
with tf.Session() as sess:
    saver.restore(sess, "./model/lstm_model.ckpt")
    
    # Test on 10 samples of each class
    for class_idx in range(6):
        indices = np.where(y_test == class_idx)[0][:5]
        correct = 0
        for idx in indices:
            sample = X_test[idx:idx+1]
            pred_raw = sess.run(pred, feed_dict={x: sample})
            pred_class = np.argmax(pred_raw)
            if pred_class == class_idx:
                correct += 1
        print(f"{LABELS[class_idx]:20s}: {correct}/5 correct")
    
    # Overall accuracy on first 100 samples
    pred_all = sess.run(pred, feed_dict={x: X_test[:100]})
    pred_classes = np.argmax(pred_all, axis=1)
    accuracy = np.mean(pred_classes == y_test[:100])
    print(f"\nOverall accuracy on 100 test samples: {accuracy*100:.1f}%")
    print(f"\n‚ö†Ô∏è If accuracy is ~16-20%, the saved model is corrupted!")
    print(f"   You need to RETRAIN the model with a fresh kernel.")

Loaded UCI HAR test data: (2947, 128, 9)
Testing model on ORIGINAL training data...

INFO:tensorflow:Restoring parameters from ./model/lstm_model.ckpt
WALKING             : 5/5 correct
WALKING_UPSTAIRS    : 5/5 correct
WALKING_DOWNSTAIRS  : 5/5 correct
SITTING             : 5/5 correct
STANDING            : 4/5 correct
LAYING              : 5/5 correct

Overall accuracy on 100 test samples: 88.0%

‚ö†Ô∏è If accuracy is ~16-20%, the saved model is corrupted!
   You need to RETRAIN the model with a fresh kernel.


In [11]:
# ============================================
# TEST: Predict ONLY on STANDING samples (label 5)
# ============================================

# Filter for only STANDING samples (label 5 in file = index 4 after -1)
standing_indices = np.where(y_test == 4)[0]  # 4 because we did -1 earlier (5-1=4)
X_standing = X_test[standing_indices]
y_standing = y_test[standing_indices]

print(f"Testing ONLY on STANDING samples")
print(f"Number of STANDING samples: {len(X_standing)}")
print(f"Shape: {X_standing.shape}")
print(f"Expected label (0-indexed): 4 = STANDING\n")

saver = tf.train.Saver()
with tf.Session() as sess:
    saver.restore(sess, "./model/lstm_model.ckpt")
    
    # Predict on first 20 standing samples
    n_samples = min(20, len(X_standing))
    pred_all = sess.run(pred, feed_dict={x: X_standing[:n_samples]})
    pred_classes = np.argmax(pred_all, axis=1)
    
    print(f"Predictions for {n_samples} STANDING samples:")
    print("-" * 40)
    for i in range(n_samples):
        predicted = LABELS[pred_classes[i]]
        correct = "‚úì" if pred_classes[i] == 4 else "‚úó"
        print(f"  Sample {i+1}: {predicted:20s} {correct}")
    
    # Summary
    correct_count = np.sum(pred_classes == 4)
    print("-" * 40)
    print(f"\nAccuracy on STANDING: {correct_count}/{n_samples} = {correct_count/n_samples*100:.1f}%")
    
    # Show prediction distribution
    from collections import Counter
    counts = Counter(pred_classes)
    print(f"\nPrediction distribution:")
    for cls, count in sorted(counts.items()):
        print(f"  {LABELS[cls]:20s}: {count} ({count/n_samples*100:.1f}%)")

Testing ONLY on STANDING samples
Number of STANDING samples: 532
Shape: (532, 128, 9)
Expected label (0-indexed): 4 = STANDING

INFO:tensorflow:Restoring parameters from ./model/lstm_model.ckpt
Predictions for 20 STANDING samples:
----------------------------------------
  Sample 1: WALKING              ‚úó
  Sample 2: STANDING             ‚úì
  Sample 3: STANDING             ‚úì
  Sample 4: STANDING             ‚úì
  Sample 5: STANDING             ‚úì
  Sample 6: STANDING             ‚úì
  Sample 7: STANDING             ‚úì
  Sample 8: STANDING             ‚úì
  Sample 9: STANDING             ‚úì
  Sample 10: STANDING             ‚úì
  Sample 11: STANDING             ‚úì
  Sample 12: STANDING             ‚úì
  Sample 13: STANDING             ‚úì
  Sample 14: STANDING             ‚úì
  Sample 15: STANDING             ‚úì
  Sample 16: STANDING             ‚úì
  Sample 17: STANDING             ‚úì
  Sample 18: STANDING             ‚úì
  Sample 19: STANDING             ‚úì
  Sample 20: ST

In [38]:
# ============================================
# DIAGNOSTIC: Compare your sensor data vs UCI HAR data
# ============================================

print("=" * 60)
print("COMPARING YOUR DATA vs UCI HAR DATA")
print("=" * 60)

# Load your standing data
df_yours = pd.read_csv('sensor_data/standing2_data.csv', delimiter=';')
df_yours = df_yours.iloc[::2]  # Downsample to 50Hz

# Load UCI HAR standing data (one sample = 128 timesteps)
uci_standing_idx = np.where(y_test == 4)[0][0]  # First STANDING sample
uci_standing = X_test[uci_standing_idx]  # Shape: (128, 9)

print("\nüìä YOUR STANDING DATA (first 128 samples after downsampling):")
print("-" * 60)
your_acc_x = df_yours['acc_x'].values[:128]
your_acc_y = df_yours['acc_y'].values[:128]
your_acc_z = df_yours['acc_z'].values[:128]
your_gyro_x = df_yours['gyro_x'].values[:128]
your_gyro_y = df_yours['gyro_y'].values[:128]
your_gyro_z = df_yours['gyro_z'].values[:128]

print(f"  acc_x:  mean={your_acc_x.mean():+.4f}, std={your_acc_x.std():.4f}, range=[{your_acc_x.min():+.3f}, {your_acc_x.max():+.3f}]")
print(f"  acc_y:  mean={your_acc_y.mean():+.4f}, std={your_acc_y.std():.4f}, range=[{your_acc_y.min():+.3f}, {your_acc_y.max():+.3f}]")
print(f"  acc_z:  mean={your_acc_z.mean():+.4f}, std={your_acc_z.std():.4f}, range=[{your_acc_z.min():+.3f}, {your_acc_z.max():+.3f}]")
print(f"  gyro_x: mean={your_gyro_x.mean():+.4f}, std={your_gyro_x.std():.4f}")
print(f"  gyro_y: mean={your_gyro_y.mean():+.4f}, std={your_gyro_y.std():.4f}")
print(f"  gyro_z: mean={your_gyro_z.mean():+.4f}, std={your_gyro_z.std():.4f}")

# Detect gravity axis
gravity_axis = "X" if abs(your_acc_x.mean()) > 0.8 else ("Y" if abs(your_acc_y.mean()) > 0.8 else "Z")
print(f"\n  üéØ GRAVITY detected on: {gravity_axis}-axis (value ‚âà {[your_acc_x.mean(), your_acc_y.mean(), your_acc_z.mean()][['X','Y','Z'].index(gravity_axis)]:.2f}g)")

print("\nüìä UCI HAR STANDING DATA (sample #{})".format(uci_standing_idx))
print("-" * 60)
# UCI order: body_acc_x, body_acc_y, body_acc_z, gyro_x, gyro_y, gyro_z, total_acc_x, total_acc_y, total_acc_z
uci_total_acc_x = uci_standing[:, 6]
uci_total_acc_y = uci_standing[:, 7]
uci_total_acc_z = uci_standing[:, 8]
uci_gyro_x = uci_standing[:, 3]
uci_gyro_y = uci_standing[:, 4]
uci_gyro_z = uci_standing[:, 5]

print(f"  total_acc_x: mean={uci_total_acc_x.mean():+.4f}, std={uci_total_acc_x.std():.4f}, range=[{uci_total_acc_x.min():+.3f}, {uci_total_acc_x.max():+.3f}]")
print(f"  total_acc_y: mean={uci_total_acc_y.mean():+.4f}, std={uci_total_acc_y.std():.4f}, range=[{uci_total_acc_y.min():+.3f}, {uci_total_acc_y.max():+.3f}]")
print(f"  total_acc_z: mean={uci_total_acc_z.mean():+.4f}, std={uci_total_acc_z.std():.4f}, range=[{uci_total_acc_z.min():+.3f}, {uci_total_acc_z.max():+.3f}]")
print(f"  gyro_x:      mean={uci_gyro_x.mean():+.4f}, std={uci_gyro_x.std():.4f}")
print(f"  gyro_y:      mean={uci_gyro_y.mean():+.4f}, std={uci_gyro_y.std():.4f}")
print(f"  gyro_z:      mean={uci_gyro_z.mean():+.4f}, std={uci_gyro_z.std():.4f}")

uci_gravity_axis = "X" if abs(uci_total_acc_x.mean()) > 0.8 else ("Y" if abs(uci_total_acc_y.mean()) > 0.8 else "Z")
print(f"\n  üéØ GRAVITY detected on: {uci_gravity_axis}-axis (value ‚âà {[uci_total_acc_x.mean(), uci_total_acc_y.mean(), uci_total_acc_z.mean()][['X','Y','Z'].index(uci_gravity_axis)]:.2f}g)")

# Now check UCI LAYING data for comparison
print("\nüìä UCI HAR LAYING DATA (for comparison)")
print("-" * 60)
uci_laying_idx = np.where(y_test == 5)[0][0]  # First LAYING sample
uci_laying = X_test[uci_laying_idx]
uci_lay_acc_x = uci_laying[:, 6]
uci_lay_acc_y = uci_laying[:, 7]
uci_lay_acc_z = uci_laying[:, 8]

print(f"  total_acc_x: mean={uci_lay_acc_x.mean():+.4f}")
print(f"  total_acc_y: mean={uci_lay_acc_y.mean():+.4f}")
print(f"  total_acc_z: mean={uci_lay_acc_z.mean():+.4f}")
uci_lay_gravity = "X" if abs(uci_lay_acc_x.mean()) > 0.8 else ("Y" if abs(uci_lay_acc_y.mean()) > 0.8 else "Z")
print(f"\n  üéØ GRAVITY detected on: {uci_lay_gravity}-axis")

print("\n" + "=" * 60)
print("üîç ANALYSIS:")
print("=" * 60)
print(f"  Your phone gravity axis:     {gravity_axis}")
print(f"  UCI STANDING gravity axis:   {uci_gravity_axis}")
print(f"  UCI LAYING gravity axis:     {uci_lay_gravity}")

if gravity_axis == uci_lay_gravity and gravity_axis != uci_gravity_axis:
    print(f"\n  ‚ö†Ô∏è  YOUR DATA MATCHES UCI LAYING ORIENTATION!")
    print(f"      This explains why the model predicts LAYING.")
    print(f"\n  üí° SOLUTION: Rotate/remap your axes to match UCI STANDING orientation.")
    print(f"      UCI STANDING has gravity on {uci_gravity_axis}-axis")
    print(f"      Your data has gravity on {gravity_axis}-axis")
elif gravity_axis != uci_gravity_axis:
    print(f"\n  ‚ö†Ô∏è  AXIS MISMATCH DETECTED!")
    print(f"      Need to remap axes to match UCI orientation.")

COMPARING YOUR DATA vs UCI HAR DATA

üìä YOUR STANDING DATA (first 128 samples after downsampling):
------------------------------------------------------------
  acc_x:  mean=+1.0008, std=0.0093, range=[+0.971, +1.032]
  acc_y:  mean=+0.0130, std=0.0074, range=[-0.006, +0.034]
  acc_z:  mean=-0.0185, std=0.0256, range=[-0.060, +0.042]
  gyro_x: mean=+0.0155, std=0.0546
  gyro_y: mean=-0.0131, std=0.0546
  gyro_z: mean=-0.0047, std=0.0257

  üéØ GRAVITY detected on: X-axis (value ‚âà 1.00g)

üìä UCI HAR STANDING DATA (sample #0)
------------------------------------------------------------
  total_acc_x: mean=+0.9938, std=0.0195, range=[+0.928, +1.055]
  total_acc_y: mean=-0.2675, std=0.0098, range=[-0.293, -0.239]
  total_acc_z: mean=+0.1387, std=0.0199, range=[+0.024, +0.173]
  gyro_x:      mean=+0.1523, std=0.1031
  gyro_y:      mean=-0.0079, std=0.1422
  gyro_z:      mean=+0.0457, std=0.0307

  üéØ GRAVITY detected on: X-axis (value ‚âà 0.99g)

üìä UCI HAR LAYING DATA (for comp