In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score

In [None]:
df = pd.read_csv('employee_data.csv')

In [None]:
df.dropna(inplace=True)

In [None]:
X = df[['Income', 'Age']].values

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
inertia = []
k_range = range(1, 11)
for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)
    inertia.append(kmeans.inertia_)

In [None]:
plt.figure(figsize=(8, 6))
plt.plot(k_range, inertia, marker='o', linestyle='-', color='b')
plt.title('Elbow Method to Find Optimal K')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Inertia (Sum of Squared Distances)')
plt.show()

In [None]:
sil_scores = []
for k in k_range[1:]:
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(X_scaled)
score = silhouette_score(X_scaled, kmeans.labels_)
sil_scores.append(score)

In [None]:
plt.figure(figsize=(8, 6))
plt.plot(k_range[1:], sil_scores, marker='o', linestyle='-', color='g')
plt.title('Silhouette Score for Different K')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Silhouette Score')
plt.show()

In [None]:
optimal_k = 3 # Chosen based on elbow and silhouette score
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
kmeans.fit(X_scaled)

In [None]:
df['Cluster'] = kmeans.labels_

In [None]:
print(f"Cluster Centers:\n{kmeans.cluster_centers_}")
print(f"Cluster Distribution:\n{df['Cluster'].value_counts()}")

In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=df['Cluster'], cmap='viridis')
plt.title('Employee Clusters Based on Income and Age')
plt.xlabel('Income (Standardized)')
plt.ylabel('Age (Standardized)')
plt.show()

In [None]:
new_employee = np.array([[50000, 30]])

In [None]:
new_employee_scaled = scaler.transform(new_employee)

In [None]:
new_cluster = kmeans.predict(new_employee_scaled)
print(f"The new employee belongs to Cluster: {new_cluster[0]}")