In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
import numpy as np


X_selected = X[:, 51:320]

# Compute inertia for different k values
inertia = []
k_values = range(1, 18, 2)
for k in k_values:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_selected)
    inertia.append(kmeans.inertia_)

# Seaborn styling
sns.set(style="whitegrid")

# Create the plot
plt.figure(figsize=(8, 6))
colors = sns.color_palette("magma", len(k_values))
sns.lineplot(x=k_values, y=inertia, marker='o', color=colors[-1], linewidth=2.5)

# Enhance visualization
plt.xlabel('Number of Clusters (k)', fontsize=12)
plt.ylabel('Inertia', fontsize=12)
plt.title('Elbow Method for Optimal K', fontsize=14, fontweight='bold')

# Annotate the elbow point (if k=optimal visually)
elbow_k = k_values[inertia.index(min(inertia, key=lambda x: abs(x - min(inertia) * 1.2)))]
plt.axvline(x=elbow_k, linestyle='--', color=colors[1], alpha=0.6, zorder=1)
plt.scatter(elbow_k, inertia[inertia.index(min(inertia, key=lambda x: abs(x - min(inertia) * 1.2)))], 
            color=colors[1], edgecolors='black', s=120, label="Elbow Point", zorder=3)

plt.legend()
plt.show()

In [None]:
# Assuming seed_value and data (X_train, X_val) are already defined
kmeans = KMeans(n_clusters=7, random_state=seed_value)

# Fit KMeans on X_train
clusters_train = kmeans.fit_predict(X_selected)

# Convert X_train and X_val to DataFrames
X = pd.DataFrame(X)

# Add cluster labels to X_train
# X["cluster"] = clusters_train

# If needed, convert back to numpy arrays
X = np.array(X)

with open('kmeans_modelold.pkl', 'wb') as f:
    pickle.dump(kmeans, f)

print("KMeans model saved successfully.")


In [None]:

from sklearn.decomposition import PCA

# Apply PCA
pca = PCA(n_components=7)
X_pca = pca.fit_transform(X_selected)

# Set Seaborn style
sns.set(style="white")

# Create figure
plt.figure(figsize=(7, 7))

# Scatter plot with transparency and color mapping
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=clusters_train, cmap='viridis', alpha=0.4, edgecolors=None)

# Add axis labels
plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0] * 100:.1f}%)', fontsize=12)
plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1] * 100:.1f}%)', fontsize=12)

# Add gridlines with dashed style
plt.axhline(0, linestyle='--', color='black', linewidth=1.2)
plt.axvline(0, linestyle='--', color='black', linewidth=1.2)

# Title
plt.title('KMeans Clustering (PCA Projection)', fontsize=14, fontweight='bold')

# Remove spines for modern look
sns.despine()

# Show plot
plt.show()