In [4]:
# Recruitment Pipeline Optimization for HR Analytics
# Implementing Dimensionality Reduction Without Built-in Functions

# Sample Dataset
data = [
    {"EmpID": "RM297", "Age": 18, "AgeGroup": "18-25", "Attrition": "Yes", "BusinessTravel": "Travel_Rarely"},
    {"EmpID": "RM302", "Age": 18, "AgeGroup": "18-25", "Attrition": "No", "BusinessTravel": "Travel_Rarely"},
    {"EmpID": "RM458", "Age": 18, "AgeGroup": "18-25", "Attrition": "Yes", "BusinessTravel": "Travel_Frequently"},
    {"EmpID": "RM728", "Age": 18, "AgeGroup": "18-25", "Attrition": "No", "BusinessTravel": "Non-Travel"}
]

# Step 1: Encode Categorical Data Manually
def encode_categorical(data, column):
    unique_values = []
    for row in data:
        if row[column] not in unique_values:
            unique_values.append(row[column])
    
    for row in data:
        row[column] = unique_values.index(row[column])
    return unique_values

# Encode columns
attrition_labels = encode_categorical(data, "Attrition")
business_travel_labels = encode_categorical(data, "BusinessTravel")

# Step 2: Prepare Features for Dimensionality Reduction
X = []
for row in data:
    X.append([row["Age"], row["BusinessTravel"]])

# Step 3: Implement PCA (Principal Component Analysis)
def mean_center(X):
    mean_vector = [sum(column) / len(column) for column in zip(*X)]
    centered_data = [[X[i][j] - mean_vector[j] for j in range(len(X[0]))] for i in range(len(X))]
    return centered_data, mean_vector

def calculate_covariance_matrix(X):
    n = len(X)
    covariance_matrix = [[0] * len(X[0]) for _ in range(len(X[0]))]
    for i in range(len(X[0])):
        for j in range(len(X[0])):
            covariance_matrix[i][j] = sum((X[k][i]) * (X[k][j]) for k in range(n)) / (n - 1)
    return covariance_matrix

def calculate_eigenvectors(matrix):
    # For simplicity, assume a 2x2 matrix and calculate eigenvalues/vectors manually
    a, b, c, d = matrix[0][0], matrix[0][1], matrix[1][0], matrix[1][1]
    trace = a + d
    determinant = a * d - b * c
    eigenvalue1 = (trace + (trace ** 2 - 4 * determinant) ** 0.5) / 2
    eigenvalue2 = (trace - (trace ** 2 - 4 * determinant) ** 0.5) / 2
    
    eigenvector1 = [b, eigenvalue1 - a] if b != 0 else [1, 0]
    eigenvector2 = [b, eigenvalue2 - a] if b != 0 else [0, 1]

    norm1 = (eigenvector1[0] ** 2 + eigenvector1[1] ** 2) ** 0.5
    norm2 = (eigenvector2[0] ** 2 + eigenvector2[1] ** 2) ** 0.5

    eigenvector1 = [x / norm1 for x in eigenvector1]
    eigenvector2 = [x / norm2 for x in eigenvector2]

    return [(eigenvalue1, eigenvector1), (eigenvalue2, eigenvector2)]

def project_data(X, eigenvectors):
    projected_data = []
    for point in X:
        projection = [sum(point[j] * eigenvector[j] for j in range(len(point))) for eigenvector in eigenvectors]
        projected_data.append(projection)
    return projected_data

# Perform PCA
centered_data, mean_vector = mean_center(X)
covariance_matrix = calculate_covariance_matrix(centered_data)
eigenvectors = [vec for val, vec in calculate_eigenvectors(covariance_matrix)]
projected_data = project_data(centered_data, eigenvectors)

# Output Results
print("Mean Vector:", mean_vector)
print("Covariance Matrix:", covariance_matrix)
print("Eigenvectors:", eigenvectors)
print("Projected Data:", projected_data)


Mean Vector: [18.0, 0.75]
Covariance Matrix: [[0.0, 0.0], [0.0, 0.9166666666666666]]
Eigenvectors: [[1.0, 0.0], [0.0, 1.0]]
Projected Data: [[0.0, -0.75], [0.0, -0.75], [0.0, 0.25], [0.0, 1.25]]
