## Collect and preprocess the data


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Define column names (based on dataset documentation)
column_names = [
    'timestamp', 'ActivityID', 'HeartRate',
    'IMU_hand_temp', 'IMU_hand_acc_x', 'IMU_hand_acc_y', 'IMU_hand_acc_z',
    'IMU_hand_gyro_x', 'IMU_hand_gyro_y', 'IMU_hand_gyro_z',
    'IMU_hand_mag_x', 'IMU_hand_mag_y', 'IMU_hand_mag_z',
    'IMU_chest_temp', 'IMU_chest_acc_x', 'IMU_chest_acc_y', 'IMU_chest_acc_z',
    'IMU_chest_gyro_x', 'IMU_chest_gyro_y', 'IMU_chest_gyro_z',
    'IMU_chest_mag_x', 'IMU_chest_mag_y', 'IMU_chest_mag_z',
    'IMU_ankle_temp', 'IMU_ankle_acc_x', 'IMU_ankle_acc_y', 'IMU_ankle_acc_z',
    'IMU_ankle_gyro_x', 'IMU_ankle_gyro_y', 'IMU_ankle_gyro_z',
    'IMU_ankle_mag_x', 'IMU_ankle_mag_y', 'IMU_ankle_mag_z'
]

# Load and concatenate multiple subject files
files = ['dataset/subject101.dat', 'dataset/subject102.dat', 'dataset/subject103.dat']  # Add more if needed
df = pd.concat([pd.read_csv(f, sep=' ', header=None, names=column_names) for f in files])

# Drop rows where ActivityID is NaN (not performing activity)
df.dropna(subset=['ActivityID'], inplace=True)

# Fill NaNs in sensor data with interpolation
df.interpolate(method='linear', inplace=True)

# Normalize relevant sensor columns
sensor_cols = [col for col in df.columns if 'acc' in col or 'gyro' in col]
df[sensor_cols] = df[sensor_cols].apply(lambda x: (x - x.mean()) / x.std())

# Create sequences using sliding window
def create_sequences(data, seq_length=50):
    sequences, labels = [], []
    for i in range(len(data) - seq_length):
        window = data.iloc[i:i + seq_length]
        if window['ActivityID'].nunique() == 1:  # Keep only consistent activity sequences
            sequence = window[sensor_cols].values
            label = int(window['ActivityID'].iloc[-1])
            sequences.append(sequence)
            labels.append(label)
    return np.array(sequences), np.array(labels)

X, y = create_sequences(df)

# Encode labels to 0-based integers
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


ModuleNotFoundError: No module named 'pandas'

#

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Define model
model = Sequential([
    LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dense(len(np.unique(y_train)), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)


# Save model
model.save('sensor_model.h5')

# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the .tflite file
with open('sensor_model.tflite', 'wb') as f:
    f.write(tflite_model)


ModuleNotFoundError: No module named 'tensorflow'