In [3]:
!pip install pandas numpy scikit-learn scipy joblib



In [4]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from scipy.stats import skew, kurtosis
import joblib


In [5]:

# 1. Load and preprocess data
def load_data(file_path):
    """Load a CSV file and return a pandas DataFrame."""
    return pd.read_csv(file_path)

def preprocess_data(df):
    """Preprocess the DataFrame by selecting relevant features and handling missing values."""
    # Select relevant feature columns
    feature_cols = [
        'acc_X', 'acc_Y', 'acc_Z',
        'mag_X', 'mag_Y', 'mag_Z',
        'gyro_X', 'gyro_Y', 'gyro_Z'
    ]
    selected_data = df[feature_cols]

    # Handle missing values (e.g., fill with zeros)
    selected_data.fillna(0, inplace=True)

    # Extract the target label column
    labels = df['activity']

    return selected_data, labels

In [6]:
# 2. Feature extraction
def extract_features(window):
    """Extract statistical features from a window of data."""
    features = {}
    for col in window.columns:
        features[f'{col}_mean'] = window[col].mean()
        features[f'{col}_std'] = window[col].std()
        features[f'{col}_min'] = window[col].min()
        features[f'{col}_max'] = window[col].max()
        features[f'{col}_skew'] = skew(window[col])
        features[f'{col}_kurtosis'] = kurtosis(window[col])
    return features

def create_feature_dataset(data, labels, window_size=50, step_size=25):
    """Create a dataset of features and corresponding labels from sliding windows."""
    X, y = [], []
    for start in range(0, len(data) - window_size, step_size):
        end = start + window_size
        window = data.iloc[start:end]
        label_window = labels.iloc[start:end]

        # Extract features from the window
        features = extract_features(window)
        X.append(features)

        # Assign the most frequent label in the window as the target label
        label = label_window.mode().iloc[0]  # Most frequent label
        y.append(label)

    # Convert to DataFrame and Series
    X = pd.DataFrame(X)
    y = pd.Series(y)

    return X, y

In [7]:
# 3. Model training and evaluation
def train_model(X, y):
    """Train a Random Forest classifier and return the trained model."""
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize and train the model
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)

    # Evaluate the model
    y_pred = clf.predict(X_test)
    print("\nClassification Report:\n")
    print(classification_report(y_test, y_pred))

    return clf

In [8]:
# 4. Save the model
def save_model(model, output_path):
    """Save the trained model to a file."""
    joblib.dump(model, output_path)
    print(f"Model saved as '{output_path}'")

In [9]:
# Main workflow
def main(file_path):
    # Load and preprocess data
    print("Loading data...")
    df = load_data(file_path)
    print("Data loaded successfully.")

    # Display unique activity labels
    unique_activities = df['activity'].unique()
    print("Unique activity labels:", unique_activities)

    data, labels = preprocess_data(df)
    print("Data and labels preprocessed successfully.")

    # Extract features and labels
    print("Extracting features...")
    X, y = create_feature_dataset(data, labels)
    print("Feature extraction completed.")

    # Train the model
    print("Training the model...")
    model = train_model(X, y)

    # Save the model
    save_model(model, 'movement_detection_model.pkl')

# Run the main workflow
main('/content/consolidated_sensor_data.csv')


Loading data...
Data loaded successfully.
Unique activity labels: ['downstairs' 'running' 'standing' 'upstairs' 'walking']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data.fillna(0, inplace=True)


Data and labels preprocessed successfully.
Extracting features...


  features[f'{col}_skew'] = skew(window[col])
  features[f'{col}_kurtosis'] = kurtosis(window[col])


Feature extraction completed.
Training the model...

Classification Report:

              precision    recall  f1-score   support

  downstairs       0.93      0.89      0.91      5015
     running       0.99      0.99      0.99      8040
    standing       1.00      1.00      1.00     11905
    upstairs       0.91      0.93      0.92      5509
     walking       0.97      0.98      0.98      7430

    accuracy                           0.97     37899
   macro avg       0.96      0.96      0.96     37899
weighted avg       0.97      0.97      0.97     37899

Model saved as 'movement_detection_model.pkl'
