In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_absolute_error

# Step 1: Dummy Dataset
data = {
    "Name": ["Alice", "Bob", "Clara", "David", "Ella", "Frank", "Grace", "Henry", "Ivy", "Jack"],
    "Gender": ["Female", "Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female", "Male"],
    "Department": ["HR", "IT", "IT", "Finance", "Finance", "HR", "IT", "Finance", "HR", "IT"],
    "Role": ["Manager", "Engineer", "Engineer", "Analyst", "Analyst", "Manager", "Engineer", "Analyst", "Manager", "Engineer"],
    "EnvironmentSatisfaction": [3, 2, 4, 1, 3, 2, 3, 4, 3, 2],
    "SalaryHikePercentage": [12, 8, 15, 6, 10, 7, 11, 14, 9, 8],
    "WorkLifeBalance": [3, 2, 4, 1, 3, 2, 3, 4, 3, 2],
    "YearsAtCompany": [5, 3, 6, 2, 4, 3, 5, 7, 6, 3],
    "ExperienceInCurrentRole": [3, 2, 4, 1, 3, 2, 3, 5, 4, 2],
    "YearsSinceLastPromotion": [1, 3, 2, 4, 3, 3, 1, 1, 2, 3],
    "YearsWithCurrentManager": [4, 3, 5, 2, 3, 3, 4, 5, 4, 3],
    "CurrentSalary": [55000, 60000, 58000, 65000, 56000, 62000, 57000, 67000, 54000, 63000],
    "DeservesAppraisal": [1, 0, 1, 0, 1, 0, 1, 1, 1, 0],  # 1: Yes, 0: No
    "AppraisalPercentage": [15, 0, 20, 0, 12, 0, 10, 18, 14, 0],
}

df = pd.DataFrame(data)

# Step 2: Preprocessing
features = [
    "EnvironmentSatisfaction",
    "SalaryHikePercentage",
    "WorkLifeBalance",
    "YearsAtCompany",
    "ExperienceInCurrentRole",
    "YearsSinceLastPromotion",
    "YearsWithCurrentManager",
]

X = df[features]
y_classification = df["DeservesAppraisal"]
y_regression = df["AppraisalPercentage"]

# Step 3: Train-Test Split
X_train, X_test, y_class_train, y_class_test = train_test_split(X, y_classification, test_size=0.2, random_state=42)
X_train_reg, X_test_reg, y_reg_train, y_reg_test = train_test_split(X, y_regression, test_size=0.2, random_state=42)

# Step 4: Classification Model (Eligibility)
classification_model = RandomForestClassifier(random_state=42)
classification_model.fit(X_train, y_class_train)
classification_predictions = classification_model.predict(X_test)

# Step 5: Regression Model (Appraisal Percentage)
regression_model = RandomForestRegressor(random_state=42)
regression_model.fit(X_train_reg, y_reg_train)
regression_predictions = regression_model.predict(X_test_reg)

# Step 6: Model Evaluation
classification_accuracy = accuracy_score(y_class_test, classification_predictions)
regression_mae = mean_absolute_error(y_reg_test, regression_predictions)

print(f"Classification Model Accuracy: {classification_accuracy * 100:.2f}%")
print(f"Regression Model Mean Absolute Error: {regression_mae:.2f}")

# Step 7: HR Input
def predict_appraisal(environment_satisfaction, salary_hike_percentage, work_life_balance, years_at_company,
                      experience_in_current_role, years_since_last_promotion, years_with_current_manager, current_salary):
    input_data = np.array([[
        environment_satisfaction,
        salary_hike_percentage,
        work_life_balance,
        years_at_company,
        experience_in_current_role,
        years_since_last_promotion,
        years_with_current_manager,
    ]])
    
    # Predict eligibility
    eligibility = classification_model.predict(input_data)[0]
    
    if eligibility == 1:
        # Predict appraisal percentage
        appraisal_percentage = regression_model.predict(input_data)[0]
        appraisal_amount = (appraisal_percentage / 100) * current_salary
        print("\nThe employee deserves an appraisal.")
        print(f"Appraisal Percentage: {appraisal_percentage:.2f}%")
        print(f"Appraisal Amount: ₹{appraisal_amount:.2f}")
    else:
        print("\nThe employee does not deserve an appraisal.")
        
predict_appraisal(
    environment_satisfaction=3,
    salary_hike_percentage=12,
    work_life_balance=3,
    years_at_company=5,
    experience_in_current_role=3,
    years_since_last_promotion=1,
    years_with_current_manager=4,
    current_salary=55000,
)


Classification Model Accuracy: 100.00%
Regression Model Mean Absolute Error: 1.85

The employee deserves an appraisal.
Appraisal Percentage: 13.58%
Appraisal Amount: ₹7469.00




In [6]:
import joblib

# Save the classification model
joblib.dump(classification_model, "classification_model.pkl")

# Save the regression model
joblib.dump(regression_model, "regression_model.pkl")

['regression_model.pkl']

In [7]:
# Save the classification model
joblib.dump(classification_model, "classification_model.pkl")

['classification_model.pkl']