<a href="https://colab.research.google.com/github/Fatine-elhassouni/HealthTrack/blob/main/Health_Status.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

In [None]:
# loading the csv data to Pandas DataFrame
health_data = pd.read_csv('/content/Health data (2).csv')

In [None]:
# print first 5 rows of the dataset
health_data.head()

Unnamed: 0,pulse,body temperature,SpO2,Status
0,70,36.0,99,0
1,70,36.0,98,0
2,70,36.0,97,0
3,70,36.0,96,0
4,70,36.1,100,0


In [None]:
#print last 5 rows of the dataset
health_data.tail()

Unnamed: 0,pulse,body temperature,SpO2,Status
5904,88,38.9,85,2
5905,88,38.9,86,2
5906,88,38.9,87,2
5907,88,38.9,88,2
5908,88,38.9,89,2


In [None]:
# number of rows and columns in the dataset
health_data.shape

(5909, 4)

In [None]:
# getting some info about the data
health_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5909 entries, 0 to 5908
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   pulse             5909 non-null   int64  
 1   body temperature  5909 non-null   float64
 2   SpO2              5909 non-null   int64  
 3   Status            5909 non-null   int64  
dtypes: float64(1), int64(3)
memory usage: 184.8 KB


In [None]:
# checking for missing values
health_data.isnull().sum()

pulse               0
body temperature    0
SpO2                0
Status              0
dtype: int64

In [None]:
# statistical measures about the data
health_data.describe()

Unnamed: 0,pulse,body temperature,SpO2,Status
count,5909.0,5909.0,5909.0,5909.0
mean,86.91826,37.789135,94.098832,1.255712
std,24.403935,1.380899,3.659424,0.727008
min,70.0,36.0,85.0,0.0
25%,75.0,36.6,92.0,1.0
50%,80.0,37.6,95.0,1.0
75%,86.0,38.7,97.0,2.0
max,179.0,40.9,100.0,2.0


In [None]:
# checking the distribution of target variable
health_data['Status'].value_counts()

2    2510
1    2400
0     999
Name: Status, dtype: int64

2 - the person’s life is in danger;
1 - the person is sick, but the illness is not life-threatening;
0 - no detected parameters out of the norm.[texte du lien](https://)

In [None]:
X = health_data.drop(columns='Status', axis=1)
Y = health_data['Status']

In [None]:
print(X)

      pulse  body temperature  SpO2
0        70              36.0    99
1        70              36.0    98
2        70              36.0    97
3        70              36.0    96
4        70              36.1   100
...     ...               ...   ...
5904     88              38.9    85
5905     88              38.9    86
5906     88              38.9    87
5907     88              38.9    88
5908     88              38.9    89

[5909 rows x 3 columns]


In [None]:
print(Y)

0       0
1       0
2       0
3       0
4       0
       ..
5904    2
5905    2
5906    2
5907    2
5908    2
Name: Status, Length: 5909, dtype: int64


In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

(5909, 3) (4727, 3) (1182, 3)


Model Training

In [None]:
# Spot Check Algorithms
models = []
models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(gamma='auto')))

# evaluate each model in turn
results = []
names = []
for name, model in models:
 kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
 cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')
 results.append(cv_results)
 names.append(name)
 print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))


LR: 0.956414 (0.009689)
LDA: 0.953244 (0.009500)
KNN: 0.988155 (0.005193)
CART: 1.000000 (0.000000)
NB: 0.958958 (0.008379)
SVM: 0.993018 (0.002686)


In [None]:
cart_model = DecisionTreeClassifier()
cart_model.fit(X_train, Y_train)

In [None]:
# Make predictions on a new dataset
predictions = cart_model.predict(X_test)

In [None]:
# accuracy on training data
X_train_prediction = cart_model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [None]:
print('Accuracy on training data :', training_data_accuracy)

Accuracy on training data : 1.0


In [None]:
# accuracy on test data
X_test_prediction = cart_model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [None]:
print('Accuracy on test data :', test_data_accuracy)

Accuracy on test data : 1.0


Building a Predictive System

In [None]:
input_data = (70,36.7,98)

# change the input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array as we are predicting for only one instance
input_data_reshape = input_data_as_numpy_array.reshape(1,-1)

prediction = cart_model.predict(input_data_reshape)
print(prediction)

if (prediction[0]==0):
  print('no detected parameters out of the norm')
elif (prediction[0]==1):
  print('You are sick, but the illness is not life-threatening')
else:
  print("Your life is in danger, See your Doctor!!!")

[0]
no detected parameters out of the norm




In [None]:
input_data = (70,36.9,93)

# change the input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array as we are predicting for only one instance
input_data_reshape = input_data_as_numpy_array.reshape(1,-1)

prediction = cart_model.predict(input_data_reshape)
print(prediction)

if (prediction[0]==0):
  print('no detected parameters out of the norm')
elif (prediction[0]==1):
  print('You are sick, but the illness is not life-threatening')
else:
  print("Your life is in danger, See your Doctor!!!")

[1]
You are sick, but the illness is not life-threatening




In [None]:
input_data = (86,40.3,90)

# change the input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array as we are predicting for only one instance
input_data_reshape = input_data_as_numpy_array.reshape(1,-1)

prediction = cart_model.predict(input_data_reshape)
print(prediction)

if (prediction[0]==0):
  print('no detected parameters out of the norm')
elif (prediction[0]==1):
  print('You are sick, but the illness is not life-threatening')
else:
  print("Your life is in danger, See your Doctor!!!")

[2]
Your life is in danger, See your Doctor!!!


