In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

# Step 1: Load the dataset
df = pd.read_csv('health_data_with_tips.csv')

# Step 2: Preprocessing
# Convert categorical columns to numeric using Label Encoding
label_enc = LabelEncoder()

df['activity_level'] = label_enc.fit_transform(df['activity_level'])
df['diet_quality'] = label_enc.fit_transform(df['diet_quality'])
df['smoking_status'] = label_enc.fit_transform(df['smoking_status'])
df['alcohol_consumption'] = label_enc.fit_transform(df['alcohol_consumption'])
df['stress_level'] = label_enc.fit_transform(df['stress_level'])

# Tips as target variable
df['target'] = label_enc.fit_transform(df['health_tips'])

# Step 3: Define features and target
X = df.drop(columns=['health_tips', 'target'])
y = df['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 4: Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 5: Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 6: Evaluate the model
y_pred = model.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(classification_report(y_test, y_pred, target_names=label_enc.classes_))

# Step 7: Save the model for future use
import joblib
joblib.dump(model, 'health_tips_predictor.pkl')
joblib.dump(label_enc, 'label_encoder.pkl')
joblib.dump(scaler, 'scaler.pkl')


Accuracy: 1.0
                                                                                                                                                                                                                                                                                                       precision    recall  f1-score   support

                                                                                                                                                                                                                                            Consider quitting smoking to improve your overall health.       1.00      1.00      1.00       145
                                                                                                                                                                                   Consider quitting smoking to improve your overall health. | Ensure you get at least 7-8 hours of sleep each night.       1.00      1.00 

['scaler.pkl']