In [53]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from statistics import mode
import os
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFECV
from sklearn.feature_selection import RFE 
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline


In [54]:
path = 'C:\\Users\\aless\\Downloads\\combined_df_feat(1).csv'

In [55]:
data = pd.read_csv(path)
data.columns


Index(['acceleration_x', 'acceleration_y', 'acceleration_z', 'gyroscope_x',
       'gyroscope_y', 'gyroscope_z', 'acceleration_x_median',
       'acceleration_y_median', 'acceleration_z_median', 'gyroscope_x_median',
       'gyroscope_y_median', 'gyroscope_z_median', 'acceleration_x_sum',
       'acceleration_x_mean', 'acceleration_x_std', 'acceleration_x_min',
       'acceleration_x_max', 'acceleration_y_sum', 'acceleration_y_mean',
       'acceleration_y_std', 'acceleration_y_min', 'acceleration_y_max',
       'acceleration_z_sum', 'acceleration_z_mean', 'acceleration_z_std',
       'acceleration_z_min', 'acceleration_z_max', 'gyroscope_x_sum',
       'gyroscope_x_mean', 'gyroscope_x_std', 'gyroscope_x_min',
       'gyroscope_x_max', 'gyroscope_y_sum', 'gyroscope_y_mean',
       'gyroscope_y_std', 'gyroscope_y_min', 'gyroscope_y_max',
       'gyroscope_z_sum', 'gyroscope_z_mean', 'gyroscope_z_std',
       'gyroscope_z_min', 'gyroscope_z_max', 'acc_magnitude_sum',
       'acc_magnitud

In [56]:
def map_label_to_new_category(label):
    if 'fall' in str(label):
        return 'falling'
    elif 'walking' in str(label):
        return 'walking'

    elif 'standing' in str(label):
        return 'standing'
    else:
        return 'other'

# Apply the function to create a new label column
data['simple_label'] = data['label'].apply(map_label_to_new_category)

data.head()

Unnamed: 0,acceleration_x,acceleration_y,acceleration_z,gyroscope_x,gyroscope_y,gyroscope_z,acceleration_x_median,acceleration_y_median,acceleration_z_median,gyroscope_x_median,...,acc_magnitude_min,acc_magnitude_max,gyr_magnitude_sum,gyr_magnitude_mean,gyr_magnitude_std,gyr_magnitude_min,gyr_magnitude_max,label,filename,simple_label
0,4292,-570,847,23,-60,-14,,,,,...,,,,,,,,standing,fall_back_data_20240129_163134_labeled,standing
1,4132,-223,886,-14,-14,53,,,,,...,,,,,,,,standing,fall_back_data_20240129_163134_labeled,standing
2,4254,-361,1013,18,-78,25,,,,,...,,,,,,,,standing,fall_back_data_20240129_163134_labeled,standing
3,4321,-270,979,-38,-65,31,,,,,...,,,,,,,,standing,fall_back_data_20240129_163134_labeled,standing
4,4228,-371,1083,-68,-70,18,,,,,...,,,,,,,,standing,fall_back_data_20240129_163134_labeled,standing


In [57]:
data = data[data["simple_label"].isin(["falling", "walking", "standing"])]

In [58]:
data = data.dropna()


X = data.drop(['filename', 'label','simple_label'], axis=1)
y = data['simple_label']

In [59]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [60]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [61]:
# Initialize the model you want to use
model = LogisticRegression(max_iter=1000)

# Initialize RFE with the model and the desired number of features
selector = RFE(model, step=1)

# Fit RFE
selector = selector.fit(X_train_scaled, y_train)

# Transform datasets
X_train_selected = selector.transform(X_train_scaled)
X_test_selected = selector.transform(X_test_scaled)

In [62]:
# Get the mask of selected features
selected_features_mask = selector.support_

# Apply this mask to the columns of the original dataset to get the selected feature names
selected_columns = X.columns[selected_features_mask]

# Print the names of the selected features
print("Selected features:", selected_columns.tolist())

Selected features: ['acceleration_x_median', 'acceleration_y_median', 'acceleration_z_median', 'acceleration_x_std', 'acceleration_x_max', 'acceleration_y_std', 'acceleration_y_max', 'acceleration_z_mean', 'acceleration_z_std', 'acceleration_z_min', 'acceleration_z_max', 'gyroscope_x_std', 'gyroscope_y_std', 'gyroscope_y_min', 'gyroscope_y_max', 'gyroscope_z_std', 'gyroscope_z_min', 'gyroscope_z_max', 'acc_magnitude_sum', 'acc_magnitude_mean', 'acc_magnitude_std', 'acc_magnitude_min', 'acc_magnitude_max', 'gyr_magnitude_std', 'gyr_magnitude_min', 'gyr_magnitude_max']


In [63]:
# Fit the model on the training set
model.fit(X_train_selected, y_train)


In [64]:
# Predict on the test set
y_pred = model.predict(X_test_selected)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9872673849167483

Classification Report:
               precision    recall  f1-score   support

     falling       0.97      0.84      0.90       228
    standing       0.98      1.00      0.99      2318
     walking       0.99      1.00      0.99      1538

    accuracy                           0.99      4084
   macro avg       0.98      0.94      0.96      4084
weighted avg       0.99      0.99      0.99      4084



In [67]:
import joblib

In [68]:
joblib.dump(scaler, 'scaler.pkl')  # This will save your scaler to scaler.pkl
joblib.dump(model, 'model.pkl')  # This will save your model to model.pkl

['model.pkl']

In [None]:
def predict_label(input_row):
    """
    Takes a row of input features, preprocesses it, applies the prediction model, 
    and returns the predicted label.
    
    Parameters:
    - input_row: A numpy array or a list of input features corresponding to one observation.
    
    Returns:
    - The predicted label for the input observation.
    """
    # Ensure input_row is in the correct shape (1, number_of_features)
    if isinstance(input_row, list):
        input_row = np.array(input_row).reshape(1, -1)
    elif isinstance(input_row, np.ndarray) and input_row.ndim == 1:
        input_row = input_row.reshape(1, -1)
    
    # Apply scaling to the input row using the trained scaler
    scaled_row = scaler.transform(input_row)
    
    # Use the model to predict the label for the scaled input row
    predicted_label = model.predict(scaled_row)
    
    return predicted_label[0]  # Return the predicted label