In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, silhouette_samples
import matplotlib.pyplot as plt
import numpy as np

# 1. Daten einlesen
file_path = "data/Obesity_Dataset_FE.xlsx"
data = pd.read_excel(file_path)

# 2. Wichtige Spalten auswählen
features = ['Activity', 'Weight', 'FoodConsumption']
data_filtered = data[features].copy()

# 3. Skalierung der Daten
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_filtered)

# 4. Clustering mit K-Means
n_clusters = 4  # Anzahl der Cluster
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
kmeans.fit(data_scaled)

# Cluster-Zuordnung hinzufügen
data_filtered['Cluster'] = kmeans.labels_

# 5. Silhouette-Koeffizient berechnen
silhouette_avg = silhouette_score(data_scaled, kmeans.labels_)
print(f"Durchschnittlicher Silhouette-Koeffizient: {silhouette_avg:.2f}")

# Silhouette-Werte für jeden Punkt
silhouette_vals = silhouette_samples(data_scaled, kmeans.labels_)
data_filtered['Silhouette'] = silhouette_vals

# 6. Cluster-Statistiken berechnen
cluster_stats = data_filtered.groupby('Cluster').agg(
    Avg_Activity=('Activity', 'mean'),
    Avg_Weight=('Weight', 'mean'),
    Avg_FoodConsumption=('FoodConsumption', 'mean'),
    Cluster_Size=('Cluster', 'count'),
    Avg_Silhouette=('Silhouette', 'mean'),
    Std_Silhouette=('Silhouette', 'std')
)

# Ergebnisse anzeigen
print("\nCluster-Parameter:")
print(cluster_stats)


Durchschnittlicher Silhouette-Koeffizient: 0.25

Cluster-Parameter:
         Avg_Activity  Avg_Weight  Avg_FoodConsumption  Cluster_Size  \
Cluster                                                                
0            8.028698   66.079845             4.353201           453   
1            8.075758   78.312727             7.339394           330   
2            5.428571   96.837760             7.103896           308   
3            5.156069   63.081387             5.387283           519   

         Avg_Silhouette  Std_Silhouette  
Cluster                                  
0              0.267954        0.131310  
1              0.218044        0.129149  
2              0.223010        0.136052  
3              0.256388        0.128437  
