# Health and Wellness Clustering
This notebook demonstrates clustering and PCA-based dimensionality reduction on patient health data to identify meaningful groups for healthcare intervention.

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster

In [None]:
# Load dataset
df = pd.read_csv('simulated_health_wellness_data.csv')
df.head()

In [None]:
# Standardize features
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)

In [None]:
# Correlation heatmap
plt.figure(figsize=(8,6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
# PCA
pca = PCA(n_components=2)
pca_components = pca.fit_transform(scaled_data)
pca_df = pd.DataFrame(pca_components, columns=['PC1', 'PC2'])

In [None]:
# K-Means Clustering
kmeans = KMeans(n_clusters=8, random_state=42)
labels = kmeans.fit_predict(scaled_data)
pca_df['Cluster'] = labels
print('Silhouette Score (K-Means):', silhouette_score(scaled_data, labels))

In [None]:
# PCA Cluster Plot
plt.figure(figsize=(8,6))
sns.scatterplot(data=pca_df, x='PC1', y='PC2', hue='Cluster', palette='tab10')
plt.title('PCA Cluster Visualization')
plt.show()

In [None]:
# Hierarchical Clustering
linked = linkage(scaled_data, method='ward')
plt.figure(figsize=(10, 5))
dendrogram(linked, truncate_mode='lastp', p=12)
plt.title('Hierarchical Clustering Dendrogram')
plt.show()