In [None]:
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["LOKY_MAX_CPU_COUNT"] = "4"

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA

df = pd.read_csv(r"C:\Users\cheyanne.gardner\Downloads\simulated_health_wellness_data.csv")
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df)

kmeans = KMeans(n_clusters=3, random_state=42)
df['KMeans_Cluster'] = kmeans.fit_predict(X_scaled)

hierarchical = AgglomerativeClustering(n_clusters=3)
df['Hierarchical_Cluster'] = hierarchical.fit_predict(X_scaled)

pca_full = PCA(n_components=0.95)
X_pca_full = pca_full.fit_transform(X_scaled)

kmeans_pca = KMeans(n_clusters=3, random_state=42)
df['KMeans_PCA_Cluster'] = kmeans_pca.fit_predict(X_pca_full)

hierarchical_pca = AgglomerativeClustering(n_clusters=3)
df['Hierarchical_PCA_Cluster'] = hierarchical_pca.fit_predict(X_pca_full)

kmeans_silhouette = silhouette_score(X_scaled, df['KMeans_Cluster'])
hierarchical_silhouette = silhouette_score(X_scaled, df['Hierarchical_Cluster'])
kmeans_pca_silhouette = silhouette_score(X_pca_full, df['KMeans_PCA_Cluster'])
hierarchical_pca_silhouette = silhouette_score(X_pca_full, df['Hierarchical_PCA_Cluster'])

wcss_original = kmeans.inertia_
wcss_pca = kmeans_pca.inertia_

evaluation_df = pd.DataFrame({
    "Model": ["KMeans (Original)", "KMeans (PCA)", "Hierarchical (Original)", "Hierarchical (PCA)"],
    "Silhouette Score": [kmeans_silhouette, kmeans_pca_silhouette, hierarchical_silhouette, hierarchical_pca_silhouette],
    "WCSS (KMeans only)": [wcss_original, wcss_pca, None, None]
})
print("Evaluation Metrics")
print(evaluation_df)

pca_components_df = pd.DataFrame(
    pca_full.components_,
    columns=df.columns[:5],
    index=[f'PC{i+1}' for i in range(pca_full.n_components_)]
)
plt.figure(figsize=(10, 6))
sns.heatmap(pca_components_df, annot=True, cmap='coolwarm', center=0)
plt.title('Feature Contributions to Principal Components')
plt.tight_layout()
plt.show()

print("Dataset Summary")
print(df.describe())
plt.figure(figsize=(10, 6))
sns.heatmap(df.iloc[:, :5].corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap of Health & Wellness Indicators")
plt.tight_layout()
plt.show()

fig, axs = plt.subplots(2, 3, figsize=(18, 10))
sns.histplot(df['Exercise_Time_Min'], kde=True, ax=axs[0, 0])
axs[0, 0].set_title('Exercise Time (min/day)')
sns.histplot(df['Healthy_Meals_Per_Day'], kde=True, ax=axs[0, 1])
axs[0, 1].set_title('Healthy Meals per Day')
sns.histplot(df['Sleep_Hours_Per_Night'], kde=True, ax=axs[0, 2])
axs[0, 2].set_title('Sleep Hours per Night')
sns.histplot(df['Stress_Level'], kde=True, ax=axs[1, 0])
axs[1, 0].set_title('Stress Level (1â€“10)')
sns.histplot(df['BMI'], kde=True, ax=axs[1, 1])
axs[1, 1].set_title('BMI')
axs[1, 2].axis('off')
plt.tight_layout()
plt.show()

fig = px.scatter(
    df,
    x='BMI',
    y='Stress_Level',
    color='Healthy_Meals_Per_Day',
    title='BMI vs Stress Level Colored by Healthy Meals',
    labels={'BMI': 'BMI', 'Stress_Level': 'Stress Level', 'Healthy_Meals_Per_Day': 'Healthy Meals/Day'},
    template='plotly_white'
)
fig.show()