In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv("C:\\Users\\vatch\\Downloads\\archive (5)\\cluster_data.csv")

# Selecting numerical features
X = data.iloc[:, :-1].values  # Features (excluding last column as target if applicable)

# Standardizing the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply PCA to reduce dimensions
n_components = min(X_scaled.shape[0], X_scaled.shape[1], 2)  # Ensure valid PCA components
if n_components > 1:
    pca = PCA(n_components=n_components)  # Reduce to 2D for visualization if possible
    X_pca = pca.fit_transform(X_scaled)
else:
    X_pca = X_scaled  # If only 1 feature, no reduction needed

# Apply K-Means Clustering
kmeans = KMeans(n_clusters=3, init='k-means++', random_state=42, n_init=10)
kmeans.fit(X_scaled)
labels = kmeans.labels_
centroids = kmeans.cluster_centers_

# If PCA was applied, visualize in 2D
if n_components > 1:
    plt.figure(figsize=(8, 6))
    plt.scatter(X_pca[:, 0], X_pca[:, 1], c=labels, cmap='viridis', alpha=0.6, edgecolors='k')
    plt.scatter(pca.transform(centroids)[:, 0], pca.transform(centroids)[:, 1], c='red', marker='x', s=200, label='Centroids')
    plt.title('K-Means Clustering with PCA')
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.legend()
    plt.show()
else:
    print("PCA could not be applied due to insufficient features.")

# Assign cluster labels to the dataset
data['Cluster'] = labels
print("Clustered Data Sample:")
print(data.head())



PCA could not be applied due to insufficient features.
Clustered Data Sample:
   Feature 1  Feature 2  Cluster
0   2.698582  -0.672960        0
1  -0.128113   4.355952        2
2   2.509049   5.773146        0
3  -1.518276   3.444886        2
4  -0.072283   2.883769        2
