In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import joblib

# Load dataset
df = pd.read_csv("Personalized_Diet_RecommendationsDC.csv")

# Define top 10 features based on importance
selected_features = [
    'Daily_Steps', 'Cholesterol_Level', 'Caloric_Intake', 'Protein_Intake',
    'Fat_Intake', 'Carbohydrate_Intake', 'Blood_Pressure_Systolic',
    'Blood_Sugar_Level', 'Age', 'Sleep_Hours'
]
X = df[selected_features]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply K-Means (4 clusters to match 4 meal plans)
kmeans = KMeans(n_clusters=4, random_state=42)
clusters = kmeans.fit_predict(X_scaled)

# Evaluate clustering
sil_score = silhouette_score(X_scaled, clusters)
print("Silhouette Score:", sil_score)

# Add clusters to dataset
df['Cluster'] = clusters
print("\nCluster Distribution:")
print(df['Cluster'].value_counts(normalize=True))

# Compare clusters with Recommended_Meal_Plan
print("\nCluster vs. Recommended_Meal_Plan:")
print(pd.crosstab(df['Cluster'], df['Recommended_Meal_Plan'], normalize='index'))

# Save model and scaler
#joblib.dump(kmeans, "kmeans_model.pkl")
#joblib.dump(scaler, "scaler_kmeans.pkl")
#print("\nModel saved as 'kmeans_model.pkl'")

Silhouette Score: 0.06544367636672671

Cluster Distribution:
Cluster
3    0.2526
0    0.2526
2    0.2514
1    0.2434
Name: proportion, dtype: float64

Cluster vs. Recommended_Meal_Plan:
Recommended_Meal_Plan         0         1         2         3
Cluster                                                      
0                      0.254157  0.254157  0.262866  0.228820
1                      0.254725  0.242399  0.273624  0.229252
2                      0.235481  0.260143  0.269690  0.234686
3                      0.259699  0.243072  0.244656  0.252573
