In [5]:
# Recruitment Pipeline Optimization for HR Analytics
# Implementing Clustering Without Built-in Functions

# Sample Dataset
data = [
    {"EmpID": "RM297", "Age": 18, "AgeGroup": "18-25", "Attrition": "Yes", "BusinessTravel": "Travel_Rarely"},
    {"EmpID": "RM302", "Age": 18, "AgeGroup": "18-25", "Attrition": "No", "BusinessTravel": "Travel_Rarely"},
    {"EmpID": "RM458", "Age": 18, "AgeGroup": "18-25", "Attrition": "Yes", "BusinessTravel": "Travel_Frequently"},
    {"EmpID": "RM728", "Age": 18, "AgeGroup": "18-25", "Attrition": "No", "BusinessTravel": "Non-Travel"}
]

# Step 1: Encode Categorical Data Manually
def encode_categorical(data, column):
    unique_values = []
    for row in data:
        if row[column] not in unique_values:
            unique_values.append(row[column])
    
    for row in data:
        row[column] = unique_values.index(row[column])
    return unique_values

# Encode columns
attrition_labels = encode_categorical(data, "Attrition")
business_travel_labels = encode_categorical(data, "BusinessTravel")

# Step 2: Prepare Features for Clustering
X = []
for row in data:
    X.append([row["Age"], row["BusinessTravel"]])

# Step 3: Implement K-Means Clustering
import random

def initialize_centroids(X, k):
    return random.sample(X, k)

def assign_clusters(X, centroids):
    clusters = [[] for _ in centroids]
    for point in X:
        distances = [sum((point[i] - centroid[i]) ** 2 for i in range(len(point))) for centroid in centroids]
        closest_centroid = distances.index(min(distances))
        clusters[closest_centroid].append(point)
    return clusters

def calculate_new_centroids(clusters):
    centroids = []
    for cluster in clusters:
        centroid = [sum(point[i] for point in cluster) / len(cluster) for i in range(len(cluster[0]))]
        centroids.append(centroid)
    return centroids

def k_means(X, k, max_iterations=100):
    centroids = initialize_centroids(X, k)
    for _ in range(max_iterations):
        clusters = assign_clusters(X, centroids)
        new_centroids = calculate_new_centroids(clusters)
        if new_centroids == centroids:
            break
        centroids = new_centroids
    return centroids, clusters

# Step 4: Perform Clustering
k = 2  # Number of clusters
centroids, clusters = k_means(X, k)

# Output Results
print("Centroids:", centroids)
print("Clusters:")
for i, cluster in enumerate(clusters):
    print(f"Cluster {i + 1}: {cluster}")


Centroids: [[18.0, 2.0], [18.0, 0.3333333333333333]]
Clusters:
Cluster 1: [[18, 2]]
Cluster 2: [[18, 0], [18, 0], [18, 1]]
