In [40]:
import pandas as pd
import re
import lightgbm as lgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from sklearn.metrics import recall_score

df = pd.read_csv('FE_data.csv')

In [41]:
def transform_feature_names(df):
    transformed_columns = []
    for column in df.columns:
        # Remove special characters using regular expressions
        transformed_column = re.sub('[^A-Za-z0-9_]+', '_', column)
        transformed_columns.append(transformed_column)
    
    return transformed_columns

# Assuming 'df' is your DataFrame
transformed_columns = transform_feature_names(df)
df.columns = transformed_columns

# Use the transformed column names in your code
df = df[transformed_columns]
#df.head(20)


#### This part is used when one wants to perform binary classification

In [38]:
# If we want to model only a binary classification between fall and no_fall
df['posture'] = df['posture'].replace(['laying', 'sitting'], 'no_fall')
df['posture'] = df['posture'].replace(['bed_fall', 'bedfall', 'chair_fall', 'chairfall', 'stairfall', 'stumbling'], 'fall')
df

Unnamed: 0,name,trial,posture,_Accelerometer_acceleration_x__m_s_2_max_,_Accelerometer_acceleration_x__m_s_2_min_,_Accelerometer_acceleration_x__m_s_2_mean_,_Accelerometer_acceleration_x__m_s_2_std_,_Accelerometer_acceleration_x__m_s_2_median_,_Accelerometer_acceleration_y__m_s_2_max_,_Accelerometer_acceleration_y__m_s_2_min_,...,_Gyroscope_gyroscope_z__rad_s_min_,_Gyroscope_gyroscope_z__rad_s_mean_,_Gyroscope_gyroscope_z__rad_s_std_,_Gyroscope_gyroscope_z__rad_s_median_,Accelerometer_acceleration_x__m_s_2_,Accelerometer_acceleration_y__m_s_2_,Accelerometer_acceleration_z__m_s_2_,Gyroscope_gyroscope_x__rad_s_,Gyroscope_gyroscope_y__rad_s_,Gyroscope_gyroscope_z__rad_s_
0,Anshu,1,fall,0.698440,0.433133,0.544834,0.014219,0.545840,0.619940,0.270059,...,0.116257,0.388581,0.016048,0.388952,0.265307,0.349881,0.276514,0.362425,0.463884,0.431135
1,Anshu,1,fall,0.600995,0.398032,0.484320,0.017943,0.483416,0.838683,0.334601,...,0.308126,0.387820,0.015270,0.389257,0.202962,0.504082,0.186080,0.204722,0.216714,0.250239
2,Anshu,1,no_fall,0.602147,0.445350,0.509183,0.022598,0.503747,0.630410,0.374496,...,0.297765,0.387994,0.014817,0.389304,0.156797,0.255914,0.187748,0.149866,0.245746,0.181156
3,Anshu,1,no_fall,0.606788,0.490623,0.535439,0.013577,0.535882,0.588683,0.389534,...,0.322133,0.388212,0.011954,0.389257,0.116164,0.199150,0.171414,0.182200,0.277575,0.112969
4,Anshu,1,fall,1.000000,0.389949,0.531052,0.017302,0.530117,0.567505,0.140425,...,0.162688,0.389647,0.017258,0.389222,0.610051,0.427080,0.503617,0.328618,0.349724,0.517731
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,Raul,5,fall,0.795041,0.428507,0.500603,0.016490,0.502364,0.709016,0.418976,...,0.317412,0.390849,0.014513,0.389554,0.366534,0.290039,0.182354,0.205481,0.224570,0.183525
86,Raul,5,no_fall,0.568599,0.437789,0.499308,0.013088,0.500935,0.594128,0.361241,...,0.336268,0.389772,0.009721,0.389200,0.130810,0.232887,0.189552,0.141528,0.146052,0.117687
87,Raul,5,no_fall,0.553123,0.428768,0.495693,0.008988,0.495341,0.560132,0.365713,...,0.354304,0.388768,0.007577,0.389144,0.124355,0.194418,0.161586,0.138582,0.164407,0.076357
88,Raul,5,fall,0.630069,0.238148,0.505528,0.021304,0.507358,0.678990,0.254423,...,0.267988,0.389753,0.013664,0.389405,0.391922,0.424567,0.420668,0.260732,0.263313,0.194044


In [44]:
# Split the data into features (X) and target variable (y)
X = df.drop(['posture', 'name'], axis=1)  # Features
y = df['posture']  # Target variable

# Convert string labels to numeric labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a LightGBM classifier
model = lgb.LGBMClassifier()

# Define the parameter grid for hyperparameter search
param_grid = {
    'num_leaves': [5, 10, 20],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [2, 5, 10],
    'n_estimators': [20, 50, 100]
}

# Perform grid search with cross-validation
grid_search = GridSearchCV(model, param_grid, cv=3)
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_

# Create a new model with the best hyperparameters
best_model = lgb.LGBMClassifier(**best_params)

# Train the model with the best hyperparameters
best_model.fit(X_train, y_train)

# Predict on the test set
y_pred = best_model.predict(X_test)

# --------------- EVAL ------------------------
# Get the best hyperparameters
best_params = grid_search.best_params_

print("Best Parameters:")
for param, value in best_params.items():
    print(f"{param}: {value}")

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Calculate recall scores for each class
recall_scores = recall_score(y_test, y_pred, average=None)
print(f"Recall: {recall_scores}")




Best Parameters:
learning_rate: 0.05
max_depth: 2
n_estimators: 100
num_leaves: 5
Accuracy: 0.3888888888888889
Recall: [0.  0.  0.  0.5 0.5 0.5 0.5]


  _warn_prf(average, modifier, msg_start, len(result))
