In [3]:
# --- 1. SETUP: Import tools and define your files ---
import pandas as pd
import os

# --- CONFIGURATION ---
# This should match your folder and file naming convention
data_path = 'sensordata/'
activities = ['sitting', 'walking', 'jogging', 'falling']
# --------------------

# Load all the individual CSV files into a dictionary
dataframes = {}
print("Step 1: Loading all sensor data files...")
try:
    for activity in activities:
        # Load accelerometer data
        acc_filename = f"{activity}accelerometer.csv"
        dataframes[f"{activity}_acc"] = pd.read_csv(os.path.join(data_path, acc_filename))
        
        # Load gyroscope data
        gyro_filename = f"{activity}gyroscope.csv"
        dataframes[f"{activity}_gyro"] = pd.read_csv(os.path.join(data_path, gyro_filename))
    print("Files loaded successfully.")
except FileNotFoundError as e:
    print(f"Error: Could not find a file. Please double-check your filenames in the 'sensordata' folder.")
    print(e)


# --- 2. MERGE & LABEL: Combine and label the data for each activity ---
print("\nStep 2: Merging and labeling data...")
processed_dfs = []
for activity in activities:
    acc_df = dataframes[f"{activity}_acc"]
    gyro_df = dataframes[f"{activity}_gyro"]
    
    # Merge based on the timestamp column
    merged_df = pd.merge(acc_df, gyro_df, on='seconds_elapsed', suffixes=('_acc', '_gyro'))
    
    # Label the data with the activity name
    merged_df['activity'] = activity
    
    processed_dfs.append(merged_df)
print("Data merged and labeled.")


# --- 3. CONCATENATE: Combine all activities into one master DataFrame ---
print("\nStep 3: Creating one master dataset...")
master_df = pd.concat(processed_dfs, ignore_index=True)
print("Master dataset created.")


# --- 4. INSPECT & SAVE: Check our final product and save it ---
print("\n--- Final Dataset Information ---")
master_df.info()

print("\n--- Count of records for each activity ---")
print(master_df['activity'].value_counts())

# Save the final, processed file
master_df.to_csv('sensor_data_processed.csv', index=False)
print("\n✅ SUCCESS! Your processed sensor data has been saved to 'sensor_data_processed.csv'")

Step 1: Loading all sensor data files...
Error: Could not find a file. Please double-check your filenames in the 'sensordata' folder.
[Errno 2] No such file or directory: 'sensordata/sittingaccelerometer.csv'

Step 2: Merging and labeling data...


KeyError: 'sitting_acc'

In [4]:
import os

# --- CONFIGURATION ---
data_path = 'sensordata/'
activities = ['sitting', 'walking', 'jogging', 'falling']
# --------------------

print("--- 1. Files the Code EXPECTS to Find ---")
expected_files = []
for activity in activities:
    expected_files.append(f"{activity}accelerometer.csv")
    expected_files.append(f"{activity}gyroscope.csv")

for f in sorted(expected_files):
    print(f)

print("\n-------------------------------------------------")
print("--- 2. Files ACTUALLY IN your 'sensordata' folder ---")
try:
    actual_files = os.listdir(data_path)
    if not actual_files:
        print("The 'sensordata' folder appears to be empty.")
    else:
        for f in sorted(actual_files):
            print(f)
except FileNotFoundError:
    print(f"Error: Could not find the directory '{data_path}'.")

--- 1. Files the Code EXPECTS to Find ---
fallingaccelerometer.csv
fallinggyroscope.csv
joggingaccelerometer.csv
jogginggyroscope.csv
sittingaccelerometer.csv
sittinggyroscope.csv
walkingaccelerometer.csv
walkinggyroscope.csv

-------------------------------------------------
--- 2. Files ACTUALLY IN your 'sensordata' folder ---
fallingaccelerometer.csv
fallinggyroscope.csv
joggingGyroscope.csv
joggingaccelerometer.csv
sittingacceloremeter.csv
sittinggyroscope.csv
walkingAccelerometer.csv
walkingGyroscope.csv


In [6]:
# --- 1. SETUP: Import tools and define your files ---
import pandas as pd
import os

# --- CONFIGURATION ---
data_path = 'sensordata/'
activities = ['sitting', 'walking', 'jogging', 'falling']
# --------------------

# This new dictionary will hold our data
dataframes = {}
all_files_loaded = True # This is a flag to track if everything works

print("--- Starting Detailed File Loading ---")

# This loop will try to load each file and report its status
for activity in activities:
    # --- Try to load Accelerometer file ---
    acc_filename = f"{activity}accelerometer.csv"
    try:
        full_path = os.path.join(data_path, acc_filename)
        dataframes[f"{activity}_acc"] = pd.read_csv(full_path)
        print(f"✅ SUCCESS: Loaded '{acc_filename}'")
    except Exception as e:
        print(f"❌ FAILED: Could not load '{acc_filename}'. Error: {e}")
        all_files_loaded = False

    # --- Try to load Gyroscope file ---
    gyro_filename = f"{activity}gyroscope.csv"
    try:
        full_path = os.path.join(data_path, gyro_filename)
        dataframes[f"{activity}_gyro"] = pd.read_csv(full_path)
        print(f"✅ SUCCESS: Loaded '{gyro_filename}'")
    except Exception as e:
        print(f"❌ FAILED: Could not load '{gyro_filename}'. Error: {e}")
        all_files_loaded = False

print("\n--- Loading Check Complete ---")
if all_files_loaded:
    print("All files loaded successfully! You can now run the next cell with the merging code.")
else:
    print("One or more files failed to load. Please check the FAILED ❌ message(s) above.")

--- Starting Detailed File Loading ---
✅ SUCCESS: Loaded 'sittingaccelerometer.csv'
✅ SUCCESS: Loaded 'sittinggyroscope.csv'
✅ SUCCESS: Loaded 'walkingaccelerometer.csv'
✅ SUCCESS: Loaded 'walkinggyroscope.csv'
✅ SUCCESS: Loaded 'joggingaccelerometer.csv'
✅ SUCCESS: Loaded 'jogginggyroscope.csv'
✅ SUCCESS: Loaded 'fallingaccelerometer.csv'
✅ SUCCESS: Loaded 'fallinggyroscope.csv'

--- Loading Check Complete ---
All files loaded successfully! You can now run the next cell with the merging code.


In [2]:
# ===================================================================
# FINAL SCRIPT FOR SENSOR DATA PREPARATION (ALL STEPS)
# ===================================================================

# --- 1. SETUP: Import tools and define your files ---
import pandas as pd
import os

print("Step 1 of 4: Loading all 8 sensor data files...")
data_path = 'sensordata/'
activities = ['sitting', 'walking', 'jogging', 'falling']
dataframes = {}

try:
    for activity in activities:
        acc_filename = f"{activity}accelerometer.csv"
        dataframes[f"{activity}_acc"] = pd.read_csv(os.path.join(data_path, acc_filename))
        
        gyro_filename = f"{activity}gyroscope.csv"
        dataframes[f"{activity}_gyro"] = pd.read_csv(os.path.join(data_path, gyro_filename))
    print("Files loaded successfully.")
except FileNotFoundError as e:
    print(f"Error: A file was not found. Please double-check your filenames in the 'sensordata' folder.")
    raise e # Stop the script if a file is missing

# --- 2. MERGE & LABEL: Combine data for each activity ---
print("\nStep 2 of 4: Merging and labeling data...")
processed_dfs = []
for activity in activities:
    acc_df = dataframes[f"{activity}_acc"]
    gyro_df = dataframes[f"{activity}_gyro"]
    
    merged_df = pd.merge(acc_df, gyro_df, on='seconds_elapsed', suffixes=('_acc', '_gyro'))
    merged_df['activity'] = activity
    processed_dfs.append(merged_df)
print("Data merged and labeled.")

# --- 3. CONCATENATE: Create one master dataset ---
print("\nStep 3 of 4: Creating the master dataset...")
master_df = pd.concat(processed_dfs, ignore_index=True)
print("Master dataset created.")

# --- 4. INSPECT & SAVE: Check and save the final file ---
print("\nStep 4 of 4: Saving the final processed file...")
master_df.to_csv('sensor_data_processed.csv', index=False)

print("\n----------------------------------------------------")
print("✅ SUCCESS! All steps are complete.")
print("The file 'sensor_data_processed.csv' has been created.")
print("----------------------------------------------------")
print("\nFinal Data Info:")
master_df.info()
print("\nActivity Counts:")
print(master_df['activity'].value_counts())

Step 1 of 4: Loading all 8 sensor data files...
Files loaded successfully.

Step 2 of 4: Merging and labeling data...
Data merged and labeled.

Step 3 of 4: Creating the master dataset...
Master dataset created.

Step 4 of 4: Saving the final processed file...

----------------------------------------------------
✅ SUCCESS! All steps are complete.
The file 'sensor_data_processed.csv' has been created.
----------------------------------------------------

Final Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26450 entries, 0 to 26449
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   time_acc         26450 non-null  int64  
 1   seconds_elapsed  26450 non-null  float64
 2   z_acc            26450 non-null  float64
 3   y_acc            26450 non-null  float64
 4   x_acc            26450 non-null  float64
 5   time_gyro        26450 non-null  int64  
 6   z_gyro           26450 non-null  float64
 

In [4]:
# --- 1. SETUP: Import necessary tools ---
import pandas as pd
from sklearn.ensemble import IsolationForest
import joblib

# --- 2. LOAD DATA: Load the processed sensor data you just created ---
df = pd.read_csv('sensor_data_processed.csv')

# --- 3. PREPARE DATA: Separate normal and abnormal activities ---
# The features are the raw sensor readings.
features = [
    'x_acc', 'y_acc', 'z_acc',
    'x_gyro', 'y_gyro', 'z_gyro'
]

# Create a DataFrame with only the "normal" activities for training.
normal_df = df[df['activity'] != 'falling']
X_train = normal_df[features]

# Create a DataFrame with the "abnormal" activity for testing.
anomaly_df = df[df['activity'] == 'falling']
X_test_anomaly = anomaly_df[features]

print(f"Training model on {len(X_train)} normal data points.")

# --- 4. TRAIN THE MODEL: The "learning" phase ---
print("\nTraining the anomaly detection model...")
model = IsolationForest(n_estimators=100, contamination='auto', random_state=42)

# We only fit the model on the NORMAL data.
model.fit(X_train)
print("Model training complete!")

# --- 5. TEST THE MODEL: See if it can spot the fall ---
# The model predicts '1' for normal and '-1' for an anomaly.
normal_predictions = model.predict(X_train.sample(10)) 
anomaly_predictions = model.predict(X_test_anomaly)

print("\n--- Model Test Results ---")
print(f"Predictions on normal data (should be all 1s): {normal_predictions}")
print(f"Predictions on falling data (should be all -1s): {anomaly_predictions}")

# --- 6. SAVE THE MODEL: Saving the trained "watchdog" to a file ---
joblib.dump(model, 'anomaly_model.joblib')
print("\n✅ SUCCESS! Anomaly detection model saved as 'anomaly_model.joblib'")

Training model on 25079 normal data points.

Training the anomaly detection model...
Model training complete!

--- Model Test Results ---
Predictions on normal data (should be all 1s): [ 1  1  1  1  1 -1 -1  1 -1  1]
Predictions on falling data (should be all -1s): [1 1 1 ... 1 1 1]

✅ SUCCESS! Anomaly detection model saved as 'anomaly_model.joblib'


In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from scipy import stats

# --- 1. Load the prepared data ---
df = pd.read_csv('sensor_data_processed.csv')

# --- 2. Encode the activity labels into numbers ---
# The model needs numbers, not text (e.g., walking=0, sitting=1)
le = LabelEncoder()
df['activity_encoded'] = le.fit_transform(df['activity'])

# --- 3. Scale the sensor data ---
# This brings all sensor values to a similar scale, which helps the model train better.
features = ['x_acc', 'y_acc', 'z_acc', 'x_gyro', 'y_gyro', 'z_gyro']
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])

# --- 4. Create the time-series "windows" ---
TIME_STEPS = 100  # Each "window" or "sentence" will have 100 timestamps
STEP = 50       # We'll slide the window forward by 50 timestamps each time

sequences = []
labels = []

# Create a window for each activity type separately
for activity_type in df['activity'].unique():
    subset = df[df['activity'] == activity_type]
    
    for i in range(0, len(subset) - TIME_STEPS, STEP):
        # Get the sensor readings for the window
        sequence_features = subset[features].iloc[i: i + TIME_STEPS].values
        # Get the single label for that window
        label = subset['activity_encoded'].iloc[i: i + TIME_STEPS].mode()[0]
        
        sequences.append(sequence_features)
        labels.append(label)

# Convert the lists to NumPy arrays, the format TensorFlow expects
X = np.array(sequences)
y = np.array(labels)

print("--- Data Preparation Complete ---")
print(f"Shape of the input data (X): {X.shape}")
print(f"Shape of the labels (y): {y.shape}")


--- Data Preparation Complete ---
Shape of the input data (X): (522, 100, 6)
Shape of the labels (y): (522,)


In [6]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# --- 1. SPLIT DATA: Create training and testing sets ---
# We use the X and y arrays we created in the last step.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- 2. DEFINE THE MODEL ARCHITECTURE ---
print("Building the LSTM model...")

model = Sequential()
# Add the LSTM layer. 
# input_shape tells the model the shape of our "windows": (100 timestamps, 6 sensor features)
model.add(LSTM(units=64, input_shape=(X_train.shape[1], X_train.shape[2])))
# Add a Dropout layer to prevent the model from memorizing the data too much.
model.add(Dropout(0.5))
# Add the final output layer. It has 4 units (one for each activity)
# and 'softmax' activation to pick the most likely activity.
model.add(Dense(units=len(np.unique(y)), activation='softmax'))

# --- 3. COMPILE THE MODEL ---
# This step prepares the model for training.
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print a summary of the model's architecture
model.summary()

# --- 4. TRAIN THE MODEL ---
print("\nTraining the model... This will take a few minutes.")
# An epoch is one full pass through the entire training dataset.
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=64,
    validation_split=0.1, # Use 10% of training data for validation
    verbose=1
)
print("Model training complete!")

# --- 5. EVALUATE THE MODEL ---
print("\nEvaluating model performance on the test data...")
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Model Accuracy on Test Data: {accuracy * 100:.2f}%")

# --- 6. SAVE THE MODEL ---
model.save('activity_classifier_model.h5')
print("\n✅ SUCCESS! Activity Classifier model saved as 'activity_classifier_model.h5'")



Building the LSTM model...


  super().__init__(**kwargs)



Training the model... This will take a few minutes.
Epoch 1/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 121ms/step - accuracy: 0.2773 - loss: 1.3798 - val_accuracy: 0.4048 - val_loss: 1.3308
Epoch 2/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.4160 - loss: 1.3348 - val_accuracy: 0.4762 - val_loss: 1.2923
Epoch 3/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 93ms/step - accuracy: 0.4560 - loss: 1.2911 - val_accuracy: 0.5000 - val_loss: 1.2468
Epoch 4/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 68ms/step - accuracy: 0.5040 - loss: 1.2298 - val_accuracy: 0.5000 - val_loss: 1.1779
Epoch 5/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.5200 - loss: 1.1560 - val_accuracy: 0.5476 - val_loss: 1.0877
Epoch 6/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.5333 - loss: 1.1099 - val_accuracy: 0.5476 - val_loss:



Model Accuracy on Test Data: 43.81%

✅ SUCCESS! Activity Classifier model saved as 'activity_classifier_model.h5'


In [7]:
import joblib
# Assuming your LabelEncoder is named 'le'
joblib.dump(le, 'label_encoder.joblib') 
print("Label encoder saved!")


Label encoder saved!


In [10]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np

# --- YOUR ACTION NEEDED HERE ---
# Update this path to an actual .wav file from the dataset you downloaded
# Use the Shift + Right-click -> "Copy as path" method
audio_file_path = r'"C:\Users\bhumi\Downloads\archive (4)\go\fb7eb481_nohash_0.wav"'
# --------------------------------

try:
    # Load the audio file
    y, sr = librosa.load(audio_file_path)

    # Create the spectrogram
    D = librosa.stft(y)
    S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)

    # Display the spectrogram
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_db, sr=sr, x_axis='time', y_axis='hz')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Spectrogram (What the AI "Sees")')
    plt.tight_layout()
    plt.show()

except Exception as e:
    print(f"Error: Could not load the audio file. Please double-check the 'audio_file_path'.")
    print(f"Details: {e}")

Error: Could not load the audio file. Please double-check the 'audio_file_path'.
Details: [Errno 22] Invalid argument: '"C:\\Users\\bhumi\\Downloads\\archive (4)\\go\\fb7eb481_nohash_0.wav"'


  y, sr = librosa.load(audio_file_path)
