In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [4]:
# Load the dataset
data = pd.read_csv('/content/drive/MyDrive/cStick.csv')
data

Unnamed: 0,Distance,Pressure,HRV,Sugar level,SpO2,Accelerometer,Decision
0,25.540,1.0,101.396,61.080,87.770,1.0,1
1,2.595,2.0,110.190,20.207,65.190,1.0,2
2,68.067,0.0,87.412,79.345,99.345,0.0,0
3,13.090,1.0,92.266,36.180,81.545,1.0,1
4,69.430,0.0,89.480,80.000,99.990,0.0,0
...,...,...,...,...,...,...,...
2034,5.655,2.0,116.310,162.242,71.310,1.0,2
2035,9.660,2.0,124.320,177.995,79.320,1.0,2
2036,15.220,1.0,93.828,40.440,82.610,1.0,1
2037,9.120,2.0,123.240,175.871,78.240,1.0,2


In [5]:
data.columns

Index(['Distance', 'Pressure', 'HRV', 'Sugar level', 'SpO2', 'Accelerometer',
       'Decision '],
      dtype='object')

In [6]:
outcome_dict = {0:'No Fall detected',1:'Slip detected',2:'Definite fall'}
outcome_list = ['No Fall detected','Slip detected','Definite fall']
outcome_list

['No Fall detected', 'Slip detected', 'Definite fall']

In [7]:
feature_list = ['Distance', 'Pressure', 'HRV', 'Sugar level', 'SpO2', 'Accelerometer']
feature_list

['Distance', 'Pressure', 'HRV', 'Sugar level', 'SpO2', 'Accelerometer']

In [8]:
df_all_cols = data.columns
df_all_cols

Index(['Distance', 'Pressure', 'HRV', 'Sugar level', 'SpO2', 'Accelerometer',
       'Decision '],
      dtype='object')

In [9]:
# convert to float (if not already float)
df_Cstick1 = data.astype(float)

# convert to numpy arrary
C = df_Cstick1.to_numpy()

# Get features and labels
features = C[:,0:6]
labels = C[:,-1]

features


array([[ 25.54 ,   1.   , 101.396,  61.08 ,  87.77 ,   1.   ],
       [  2.595,   2.   , 110.19 ,  20.207,  65.19 ,   1.   ],
       [ 68.067,   0.   ,  87.412,  79.345,  99.345,   0.   ],
       ...,
       [ 15.22 ,   1.   ,  93.828,  40.44 ,  82.61 ,   1.   ],
       [  9.12 ,   2.   , 123.24 , 175.871,  78.24 ,   1.   ],
       [ 62.441,   0.   ,  78.876,  76.435,  96.435,   0.   ]])

In [10]:
# Scramble the order of all the rows in the data to remove (potential) dependence on on its ordering
# This makes our model more generalizable/robust
indices = np.random.permutation(len(labels))

# we scramble both X and y, necessarily with the same permutation. Use the same indices arragangement.
X = features[indices]
y = labels[indices]

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [14]:
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import classification_report

svm_model = SVC(kernel='linear', random_state=42)
scores = cross_val_score(svm_model, X, y, cv=5, scoring='accuracy') # 5-fold cross-validation
print("Cross-validation scores:", scores)
print("Mean cross-validation accuracy:", scores.mean())

y_pred_cv = cross_val_predict(svm_model, X, y, cv=5)
print("\nClassification Report (Cross-validated):")
print(classification_report(y, y_pred_cv, target_names=outcome_list))


Cross-validation scores: [1. 1. 1. 1. 1.]
Mean cross-validation accuracy: 1.0

Classification Report (Cross-validated):
                  precision    recall  f1-score   support

No Fall detected       1.00      1.00      1.00       690
   Slip detected       1.00      1.00      1.00       682
   Definite fall       1.00      1.00      1.00       667

        accuracy                           1.00      2039
       macro avg       1.00      1.00      1.00      2039
    weighted avg       1.00      1.00      1.00      2039



In [15]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test) # Apply the same scaling to the test set

svm_model.fit(X_train_scaled, y_train) # Train on scaled data
y_pred_svm = svm_model.predict(X_test_scaled)

In [16]:
# Evaluate the SVM model
y_pred_svm = svm_model.predict(X_test)

print("SVM Accuracy Score:")
print(accuracy_score(y_test, y_pred_svm))

print("\nSVM Classification Report:")
print(classification_report(y_test, y_pred_svm, target_names=outcome_list))

SVM Accuracy Score:
0.6642156862745098

SVM Classification Report:
                  precision    recall  f1-score   support

No Fall detected       1.00      1.00      1.00       138
   Slip detected       0.00      0.00      0.00       137
   Definite fall       0.49      1.00      0.66       133

        accuracy                           0.66       408
       macro avg       0.50      0.67      0.55       408
    weighted avg       0.50      0.66      0.55       408



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [17]:
# Predict the output with the model
prediction = svm_model.predict(X_test)
print('The accuracy of the SVM Model is: {:.2f}%'.format(accuracy_score(y_test, prediction) * 100))


The accuracy of the SVM Model is: 66.42%


In [20]:
from sklearn.linear_model import LogisticRegression

logreg_model = LogisticRegression(C=1.0, penalty='l2', solver='lbfgs', max_iter=1000, random_state=42) #l2 regularization, adjust C for strength
logreg_model.fit(X_train, y_train)

In [21]:
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']} # Example for SVM
grid_search = GridSearchCV(svm_model, param_grid, cv=5, scoring='f1_macro') #scoring='f1_macro' is good for imbalanced datasets
grid_search.fit(X_train, y_train)
print(grid_search.best_params_)
print(grid_search.best_score_)

{'C': 0.1, 'kernel': 'linear'}
1.0


In [22]:
# Predict using the Logistic Regression model
y_pred_log_reg = logreg_model.predict(X_test)

# Evaluate the Logistic Regression model using accuracy metric
log_reg_accuracy = accuracy_score(y_test, y_pred_log_reg)

# Print the accuracy of the Logistic Regression model
print('The accuracy of the Logistic Regression Model is:', log_reg_accuracy)

print("\nLogistic Regression Classification Report:")
print(classification_report(y_test, y_pred_log_reg, target_names=outcome_list))

The accuracy of the Logistic Regression Model is: 1.0

Logistic Regression Classification Report:
                  precision    recall  f1-score   support

No Fall detected       1.00      1.00      1.00       138
   Slip detected       1.00      1.00      1.00       137
   Definite fall       1.00      1.00      1.00       133

        accuracy                           1.00       408
       macro avg       1.00      1.00      1.00       408
    weighted avg       1.00      1.00      1.00       408



In [24]:
# Predict the output with the model
prediction = logreg_model.predict(X_test)
print('The accuracy of the Logistic Regression Model is: {:.2f}%'.format(accuracy_score(y_test, prediction) * 100))


The accuracy of the Logistic Regression Model is: 100.00%
