## Feature Engineering & Model Traning on Cleaned Health Monitoring Dataset


In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Loading the cleaned dataset
df = pd.read_csv("D:\Machine_Learning_Project\AI_for_Elderly_Care_and_Support\cleaned_data\cleaned_health_monitoring_dataset.csv")

# displaying the first few rows
df.head()

  df = pd.read_csv("D:\Machine_Learning_Project\AI_for_Elderly_Care_and_Support\cleaned_data\cleaned_health_monitoring_dataset.csv")


Unnamed: 0,user_id,timestamp,heart_rate,hr_alert,bp_alert,glucose_level,glucose_alert,spo2,spo2_alert,alert_triggered,caregiver_notified,systolic_bp,diastolic_bp
0,D1000,2025-01-22 20:42:00,116,1,1,141,1,98,0,1,1,136.0,79.0
1,D1001,2025-01-16 12:22:00,119,1,0,146,1,93,0,1,1,105.0,77.0
2,D1002,2025-01-10 09:26:00,97,0,1,133,0,97,0,1,1,120.0,87.0
3,D1003,2025-01-10 09:53:00,113,1,1,82,0,98,0,1,1,138.0,65.0
4,D1004,2025-01-03 15:50:00,88,0,0,146,1,97,0,1,1,108.0,69.0


In [16]:
# Converting timeStamp
df['timestamp'] = pd.to_datetime(df['timestamp'])

In [17]:
# Extracting time Features
df["hour"] = df["timestamp"].dt.hour
df["day_of_week"] = df["timestamp"].dt.dayofweek
df["month"] = df["timestamp"].dt.month


In [18]:
# Droping the id and timstamps
df = df.drop(columns=["user_id", "timestamp"])

In [19]:
# Displaying the data types
print(df.dtypes)

heart_rate              int64
hr_alert                int64
bp_alert                int64
glucose_level           int64
glucose_alert           int64
spo2                    int64
spo2_alert              int64
alert_triggered         int64
caregiver_notified      int64
systolic_bp           float64
diastolic_bp          float64
hour                    int32
day_of_week             int32
month                   int32
dtype: object


In [20]:
# Null values
print("Null values: ", df.isnull().sum())

Null values:  heart_rate            0
hr_alert              0
bp_alert              0
glucose_level         0
glucose_alert         0
spo2                  0
spo2_alert            0
alert_triggered       0
caregiver_notified    0
systolic_bp           0
diastolic_bp          0
hour                  0
day_of_week           0
month                 0
dtype: int64


In [21]:
df.head()

Unnamed: 0,heart_rate,hr_alert,bp_alert,glucose_level,glucose_alert,spo2,spo2_alert,alert_triggered,caregiver_notified,systolic_bp,diastolic_bp,hour,day_of_week,month
0,116,1,1,141,1,98,0,1,1,136.0,79.0,20,2,1
1,119,1,0,146,1,93,0,1,1,105.0,77.0,12,3,1
2,97,0,1,133,0,97,0,1,1,120.0,87.0,9,4,1
3,113,1,1,82,0,98,0,1,1,138.0,65.0,9,4,1
4,88,0,0,146,1,97,0,1,1,108.0,69.0,15,4,1


In [22]:
# Feature (X) and Target (y)
X = df[['heart_rate', 'hr_alert', 'bp_alert', 'glucose_level', 'glucose_alert',
        'spo2', 'spo2_alert', 'systolic_bp', 'diastolic_bp', 'hour', 'day_of_week', 'month']]
y = df['alert_triggered']  # Main target to train model

In [23]:
#Target = whether an alert was triggered (yes/no)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [24]:
# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [25]:
# Traning Model
log_reg = LogisticRegression()
log_reg.fit(X_train_scaled, y_train)


In [26]:
# Predicting on Test Set
y_pred = log_reg.predict(X_test_scaled)

In [27]:
# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 1.0

Confusion Matrix:
 [[ 529    0]
 [   0 1471]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       529
           1       1.00      1.00      1.00      1471

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



In [28]:
import joblib
joblib.dump(log_reg, "health_alert.pkl")

['health_alert.pkl']