In [22]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans


In [23]:

iris = load_iris()
data = iris.data
feature_names = iris.feature_names

In [24]:

scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)



In [25]:

kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(data_scaled)

iris_features = pd.DataFrame(data_scaled, columns=feature_names)
iris_features['Cluster'] = kmeans.labels_

cluster_summary = iris_features.groupby('Cluster').mean()
cluster_std = iris_features.groupby('Cluster').std()
cluster_count = iris_features.groupby('Cluster').count()

print("Cluster Mean Values:")
print(cluster_summary)

print("\nCluster Standard Deviation Values:")
print(cluster_std)

print("\nCluster Sample Count:")
print(cluster_count)

report = """
Cluster Analysis Report:

Based on K-means clustering with 3 clusters, we analyzed the Iris dataset. Here are the key observations:

1. *Cluster 0 (Setosa-like) Characteristics*:
   - Sepal length: {:.2f}, Sepal width: {:.2f}, Petal length: {:.2f}, Petal width: {:.2f}

2. *Cluster 1 (Versicolor-like) Characteristics*:
   - Sepal length: {:.2f}, Sepal width: {:.2f}, Petal length: {:.2f}, Petal width: {:.2f}

3. *Cluster 2 (Virginica-like) Characteristics*:
   - Sepal length: {:.2f}, Sepal width: {:.2f}, Petal length: {:.2f}, Petal width: {:.2f}

The features that distinguish these clusters most notably are petal length and petal width.

Cluster Summary:
- Cluster 0: Smallest flowers (Setosa-like)
- Cluster 1: Intermediate-sized flowers (Versicolor-like)
- Cluster 2: Largest flowers (Virginica-like)
""".format(
    cluster_summary.iloc[0]['sepal length (cm)'], cluster_summary.iloc[0]['sepal width (cm)'],
    cluster_summary.iloc[0]['petal length (cm)'], cluster_summary.iloc[0]['petal width (cm)'],
    cluster_summary.iloc[1]['sepal length (cm)'], cluster_summary.iloc[1]['sepal width (cm)'],
    cluster_summary.iloc[1]['petal length (cm)'], cluster_summary.iloc[1]['petal width (cm)'],
    cluster_summary.iloc[2]['sepal length (cm)'], cluster_summary.iloc[2]['sepal width (cm)'],
    cluster_summary.iloc[2]['petal length (cm)'], cluster_summary.iloc[2]['petal width (cm)']
)

print(report)

with open("cluster_analysis_report.txt", "w") as file:
    file.write(report)

Cluster Mean Values:
         sepal length (cm)  sepal width (cm)  petal length (cm)  \
Cluster                                                           
0                 0.571004         -0.371768           0.691119   
1                -0.816231          1.318958          -1.286834   
2                -1.327654         -0.373138          -1.137236   

         petal width (cm)  
Cluster                    
0                0.663152  
1               -1.219712  
2               -1.114862  

Cluster Standard Deviation Values:
         sepal length (cm)  sepal width (cm)  petal length (cm)  \
Cluster                                                           
0                 0.754541          0.726348           0.437413   
1                 0.349980          0.623778           0.103387   
2                 0.290851          0.831505           0.437158   

         petal width (cm)  
Cluster                    
0                0.541709  
1                0.151780  
2                0.