In [12]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import tree
import joblib

# Step 1: Load Data
df = pd.read_csv('synthetic_health_lifestyle_dataset.csv')
df.dropna(inplace=True)

# Step 2: Preprocess Categorical Columns with Correct Capitalization
df['Gender'] = df['Gender'].map({'Female': 0, 'Male': 1, 'Other': 2})
df['Smoker'] = df['Smoker'].map({'No': 0, 'Yes': 1})
df['Diet_Quality'] = df['Diet_Quality'].map({'Poor': 0, 'Average': 1, 'Good': 2, 'Excellent': 3})
df['Alcohol_Consumption'] = df['Alcohol_Consumption'].map({'None': 0, 'Low': 1, 'Moderate': 2, 'High': 3})
df['Chronic_Disease'] = df['Chronic_Disease'].map({'No': 0, 'Yes': 1})
df['Exercise_Freq'] = df['Exercise_Freq'].map({
    'None': 0,
    '1-2 times/week': 1,
    '3-5 times/week': 2,
    'Daily': 3
})

# Step 3: Define Features and Label
X = df.drop(columns=['ID', 'Chronic_Disease'])  # Drop ID and target
y = df['Chronic_Disease']

# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train the Model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Step 6: Evaluate Model
predictions = model.predict(X_test)
score = accuracy_score(y_test, predictions)
print("Accuracy:", score)

Accuracy: 0.6869047619047619


In [14]:
# Step 7: Save the Model
joblib.dump(model, 'health-predictor.joblib')

# Step 8: Load and Predict
model = joblib.load('health-predictor.joblib')


In [15]:
# Example sample input: Age=30, Gender=1(male), Height_cm=170, Weight_kg=65,
# BMI=22.5, Smoker=0, Exercise_Freq=3 (Daily), Diet_Quality=2 (Good),
# Alcohol_Consumption=1 (Low), Stress_Level=5, Sleep_Hours=7

sample_data = pd.DataFrame([{
    'Age': 30,
    'Gender': 1,
    'Height_cm': 170,
    'Weight_kg': 65,
    'BMI': 22.5,
    'Smoker': 0,
    'Exercise_Freq': 3,
    'Diet_Quality': 2,
    'Alcohol_Consumption': 1,
    'Stress_Level': 5,
    'Sleep_Hours': 7
}])

prediction = model.predict(sample_data)
print("Predicted Chronic Disease (0=No, 1=Yes):", prediction[0])

Predicted Chronic Disease (0=No, 1=Yes): 1


In [16]:
# Step 9: Export Tree
tree.export_graphviz(
    model,
    out_file='health-tree.dot',
    feature_names=X.columns,
    class_names=['No', 'Yes'],
    label='all',
    filled=True,
    rounded=True
)