In [43]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import r2_score
from xgboost import XGBRegressor

In [44]:
df = pd.read_csv('/kaggle/input/datasets/romyajitdas/hackathon-py/iot_healthcare_realistic_10000_rows.csv')

In [45]:
df.head()

Unnamed: 0,HR,SpO2,Temperature,ECG_mean,BP_Systolic,BP_Diastolic,Condition
0,67,97,36.68,1.247,112.7,75.1,Normal
1,93,99,36.77,0.8,118.0,77.3,Normal
2,90,88,37.49,0.778,120.3,72.1,Hypoxia
3,78,98,37.15,1.017,112.4,74.9,Normal
4,93,97,38.26,1.088,111.1,76.3,Fever


In [46]:
df.info

<bound method DataFrame.info of        HR  SpO2  Temperature  ECG_mean  BP_Systolic  BP_Diastolic Condition
0      67    97        36.68     1.247        112.7          75.1    Normal
1      93    99        36.77     0.800        118.0          77.3    Normal
2      90    88        37.49     0.778        120.3          72.1   Hypoxia
3      78    98        37.15     1.017        112.4          74.9    Normal
4      93    97        38.26     1.088        111.1          76.3     Fever
...   ...   ...          ...       ...          ...           ...       ...
9995  115    90        36.29     1.197        115.1          76.2   Hypoxia
9996   94   100        37.25     0.797        108.3          77.2    Normal
9997   78    96        36.34     1.232        119.6          78.8    Normal
9998   83    97        36.33     0.877        112.6          77.4    Normal
9999   91    99        38.37     1.032        109.2          65.4     Fever

[10000 rows x 7 columns]>

In [47]:
df.shape

(10000, 7)

In [48]:
df.isnull().sum()

HR              0
SpO2            0
Temperature     0
ECG_mean        0
BP_Systolic     0
BP_Diastolic    0
Condition       0
dtype: int64

In [49]:
df['HR_SpO2'] = df['HR'] * df['SpO2']
df['HR_Temp'] = df['HR'] * df['Temperature']
df['ECG_HR'] = df['ECG_mean'] * df['HR']

In [50]:
X = df[['HR', 'SpO2', 'Temperature', 'ECG_mean']]
y = df[['BP_Systolic', 'BP_Diastolic']]

In [51]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=42)

In [52]:
xgb = MultiOutputRegressor(
    XGBRegressor(
        n_estimators=400,
        learning_rate=0.05,
        max_depth=6,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    )
)


In [53]:
xgb.fit(X_train, y_train)

In [54]:
X_train

Unnamed: 0,HR,SpO2,Temperature,ECG_mean
9254,61,96,36.31,0.820
1561,72,99,36.33,0.892
1670,118,81,36.95,1.223
6087,98,99,36.75,0.894
6669,79,98,37.28,0.976
...,...,...,...,...
5734,85,95,36.73,0.967
5191,81,97,36.45,1.178
5390,98,95,36.44,0.849
860,102,98,36.43,0.817


In [55]:
y_train

Unnamed: 0,BP_Systolic,BP_Diastolic
9254,119.5,70.4
1561,119.2,73.4
1670,120.0,62.3
6087,107.6,65.4
6669,109.2,74.0
...,...,...
5734,102.6,77.3
5191,114.3,68.2
5390,107.3,65.4
860,143.3,92.2


In [56]:
y_pred = xgb.predict(X_test)

r2 = r2_score(y_test, y_pred)
print("Model Accuracy (R2 Score):", round(r2 * 100, 2), "%")

Model Accuracy (R2 Score): 50.71 %


In [57]:
joblib.dump(xgb, "xgboost_bp_model.pkl")

print("Model Saved Successfully!")

Model Saved Successfully!
