In [None]:
# 📦 Step 1: Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

# 📥 Step 2: Load your dataset
df = pd.read_csv("ip_mail_data.csv")  # Replace with your actual file path

# 🧮 Step 3: Feature engineering
df['subj_per_mail'] = df['n_proc_subj'] / df['n_mails']
df['msgid_per_mail'] = df['n_msg_id'] / df['n_mails']

# ⚖️ Step 4: Normalize features for clustering
features_for_clustering = ['n_mails', 'n_proc_subj', 'n_msg_id', 'subj_per_mail', 'msgid_per_mail']
X = df[features_for_clustering].fillna(0)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 📊 Step 5: KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=42)
df['cluster'] = kmeans.fit_predict(X_scaled)

# 🕸️ Step 6: Radar charts using original features only
raw_features = ['n_mails', 'n_proc_subj', 'n_msg_id']
cluster_means_raw = df.groupby('cluster')[raw_features].mean()
normalized_raw = (cluster_means_raw - cluster_means_raw.min()) / (cluster_means_raw.max() - cluster_means_raw.min())

labels = raw_features
num_vars = len(labels)
angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
angles += angles[:1]  # close the loop

# 🎨 Step 7: Plot each cluster separately
for i, row in normalized_raw.iterrows():
    values = row.tolist()
    values += values[:1]

    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
    ax.plot(angles, values, label=f'Cluster {i}', color='tab:blue')
    ax.fill(angles, values, alpha=0.25, color='tab:blue')

    ax.set_theta_offset(np.pi / 2)
    ax.set_theta_direction(-1)
    ax.set_thetagrids(np.degrees(angles[:-1]), labels)
    ax.set_title(f"Radar Chart for Cluster {i} (Raw Features)", fontsize=14)
    ax.legend(loc='upper right')
    plt.tight_layout()
    plt.show()
