# **<u>Health Lifestyle Analysis</u>**

In [2]:
import pandas as pd

## Dataset

##### Dataset Title : 
Synthetic Health and Lifestyle Dataset

##### Dataset Description :
This is a synthetic dataset generated to simulate health and lifestyle information for 1,326 individuals. It includes key attributes such as age, gender, body meeasurments (Height, Weight), Lifestyle habits (Smoking, Alchohol consumption, Excersise Frequency), sleep pattern, and presece of chronic diseases.

[Full Dataset on Kaggle](https://www.kaggle.com/datasets/sahilislam007/health-and-lifestyle-dataset)

In [4]:
df = pd.read_csv('./datasets/synthetic_health_lifestyle_dataset.csv')
df.drop(["Height_cm","Weight_kg"],axis="columns",inplace=True)
df.head()

Unnamed: 0,ID,Age,Gender,BMI,Smoker,Exercise_Freq,Diet_Quality,Alcohol_Consumption,Chronic_Disease,Stress_Level,Sleep_Hours
0,1,56,Other,11.8,Yes,,Poor,,No,9,8.5
1,2,69,Other,24.7,No,1-2 times/week,Good,High,No,2,5.9
2,3,46,Female,27.3,No,Daily,Excellent,Moderate,No,3,4.8
3,4,32,Male,26.3,No,3-5 times/week,Excellent,Moderate,No,9,6.6
4,5,60,Male,24.1,No,3-5 times/week,Excellent,Low,Yes,6,6.1


#### Handling Missing Values

##### Columns : 
+ Excercise_Freq
+ Alcohol_Consumption

In [6]:
df["Exercise_Freq"] = df["Exercise_Freq"].fillna(df["Exercise_Freq"].mode()[0])
df["Alcohol_Consumption"] = df["Alcohol_Consumption"].fillna(df["Alcohol_Consumption"].mode()[0])
df.isna().sum()

ID                     0
Age                    0
Gender                 0
BMI                    0
Smoker                 0
Exercise_Freq          0
Diet_Quality           0
Alcohol_Consumption    0
Chronic_Disease        0
Stress_Level           0
Sleep_Hours            0
dtype: int64

#### Encording Categorical Variables

In [8]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
binaryMap = {"Yes":1,"No":0}

---
**Use "_Label Encording_" technique** <br><br>
Columns :
+ Gender
+ Exercise_Freq
+ Diet_Quality
+ Alcohol_Consumption

In [10]:
df["Gender"] = le.fit_transform(df["Gender"])
df["Exercise_Freq"] = le.fit_transform(df["Exercise_Freq"])
df["Diet_Quality"] = le.fit_transform(df["Diet_Quality"])
df["Alcohol_Consumption"] = le.fit_transform(df["Alcohol_Consumption"])
df.head()

Unnamed: 0,ID,Age,Gender,BMI,Smoker,Exercise_Freq,Diet_Quality,Alcohol_Consumption,Chronic_Disease,Stress_Level,Sleep_Hours
0,1,56,2,11.8,Yes,2,3,1,No,9,8.5
1,2,69,2,24.7,No,0,2,0,No,2,5.9
2,3,46,0,27.3,No,2,1,2,No,3,4.8
3,4,32,1,26.3,No,1,1,2,No,9,6.6
4,5,60,1,24.1,No,1,1,1,Yes,6,6.1


---
**Use "_Binary Encording_" technique** 
<br>
<br>
Columns :
+ Smoker
+ Chronic_Diesease

In [12]:
df["Smoker"] = df["Smoker"].map(binaryMap)
df["Chronic_Disease"] = df["Chronic_Disease"].map(binaryMap)
df.head()

Unnamed: 0,ID,Age,Gender,BMI,Smoker,Exercise_Freq,Diet_Quality,Alcohol_Consumption,Chronic_Disease,Stress_Level,Sleep_Hours
0,1,56,2,11.8,1,2,3,1,0,9,8.5
1,2,69,2,24.7,0,0,2,0,0,2,5.9
2,3,46,0,27.3,0,2,1,2,0,3,4.8
3,4,32,1,26.3,0,1,1,2,0,9,6.6
4,5,60,1,24.1,0,1,1,1,1,6,6.1


In [13]:
from sklearn.preprocessing import Normalizer

nz = Normalizer()

y_values = df["Chronic_Disease"]
x_values = df.drop(columns="Chronic_Disease")

normalized = nz.fit_transform(x_values)
normalized_df = pd.DataFrame(normalized,columns=x_values.columns)

normalized_df.drop("ID",axis="columns",inplace=True)
normalized_df.head()

Unnamed: 0,Age,Gender,BMI,Smoker,Exercise_Freq,Diet_Quality,Alcohol_Consumption,Stress_Level,Sleep_Hours
0,0.953616,0.034058,0.200941,0.017029,0.034058,0.051087,0.017029,0.15326,0.144745
1,0.937073,0.027162,0.335445,0.0,0.0,0.027162,0.0,0.027162,0.080127
2,0.852535,0.0,0.505961,0.0,0.037067,0.018533,0.037067,0.0556,0.08896
3,0.741335,0.023167,0.609285,0.0,0.023167,0.023167,0.046333,0.2085,0.1529
4,0.916803,0.01528,0.368249,0.0,0.01528,0.01528,0.01528,0.09168,0.093208


In [14]:
from sklearn.model_selection import train_test_split

In [15]:
x_train, x_test, y_train, y_test = train_test_split(x_values,y_values,test_size=0.2)

---

### Support Vector Machine

In [17]:
from sklearn.svm import SVC

In [None]:
model_lin = SVC(kernel="linear")
model_lin.fit(x_train,y_train)
model_lin.score(x_test,y_test)

In [None]:
model_rbf = SVC(kernel="rbf")
model_rbf.fit(x_train,y_train)
model_rbf.score(x_test,y_test)

In [None]:
model_poly = SVC(kernel="poly")
model_poly.fit(x_train,y_train)
model_poly.score(x_test,y_test)

---

### Gaussian Naïve Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
model= GaussianNB()
model.fit(x_train,y_train)
model.score(x_test,y_test)