In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler

# Step 1: Generate Synthetic Dataset
np.random.seed(42)
data = {
    "Age": np.random.randint(20, 60, 200),
    "BMI": np.random.uniform(18, 35, 200),
    "Smoking": np.random.choice([0, 1], 200),
    "Exercise": np.random.randint(0, 5, 200),
    "Diabetes": np.random.choice([0, 1], 200),
    "Hypertension": np.random.choice([0, 1], 200),
    "Heart Disease": np.random.choice([0, 1], 200),
    "Doctor Visits": np.random.randint(1, 10, 200),
    "Medication Usage": np.random.randint(0, 5, 200)
}

df = pd.DataFrame(data)

# Step 2: Standardize the Data
scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

# Step 3: Apply Gaussian Mixture Model (GMM) Clustering
gmm = GaussianMixture(n_components=4, random_state=42)
df["Cluster"] = gmm.fit_predict(df_scaled)

# Step 4: Sample Data for Visualization
df_sampled = df.sample(n=100, random_state=42)
df_sampled["Cluster"] = df_sampled["Cluster"].astype(str)

# Step 5: Generate Pairplot for Cluster Analysis
sns.set(style="whitegrid")
pairplot = sns.pairplot(df_sampled, hue="Cluster", palette="tab10", diag_kind="hist")
plt.suptitle("Cluster Analysis using Gaussian Mixture Model", y=1.02)
plt.tight_layout()

# Step 6: Save the Visualization
pairplot.savefig("gmm_clusters_final.png")
plt.close()

# Output Dataset and Image Path
df.to_csv("personalized_healthcare_dataset.csv", index=False)
print("Dataset and visualization successfully generated.")


Dataset and visualization successfully generated.
