In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import joblib

In [2]:
df_cleaned = pd.read_csv('cleaned_employee_data.csv')
df_cleaned.head()

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,Gender,...,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,37,1,Travel_Rarely,1141,Research & Development,11,2,Medical,1,Female,...,3,1,0,15,2,1,1,0,0,0
1,51,1,Travel_Rarely,1323,Research & Development,4,4,Life Sciences,1,Male,...,3,3,3,18,2,4,10,0,2,7
2,42,0,Travel_Frequently,555,Sales,26,3,Marketing,3,Female,...,3,4,1,23,2,4,20,4,4,8
3,40,0,Travel_Rarely,1124,Sales,1,2,Medical,2,Male,...,4,3,3,6,2,2,4,3,0,2
4,55,1,Travel_Rarely,725,Research & Development,2,3,Medical,4,Male,...,3,4,1,24,2,3,5,2,1,4


In [7]:
scaler = joblib.load("scaler.pkl")
model = joblib.load("svm_model.pkl")
X_columns = joblib.load("x_columns.pkl")

In [8]:
# Filter data karyawan yang masih aktif
current_employees = df_cleaned[df_cleaned['Attrition'] == 0].copy()
current_employees.reset_index(drop=True, inplace=True)

# Buat dataframe baru tanpa kolom target
X = current_employees.drop(columns=['Attrition'])

# Identifikasi kolom kategori
categorical_cols = X.select_dtypes(include=['object']).columns

# One-hot encoding
X_encoded = pd.get_dummies(X, columns=categorical_cols, drop_first=True)

# Tambahkan kolom yang hilang agar sesuai dengan data training
missing_cols = set(X_columns) - set(X_encoded.columns)
for col in missing_cols:
    X_encoded[col] = 0

# Pastikan urutan kolom sama seperti saat training
X_encoded = X_encoded[X_columns]

# Normalisasi (scaling)
X_scaled = scaler.transform(X_encoded)

In [9]:
current_employees.head()

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,Gender,...,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,42,0,Travel_Frequently,555,Sales,26,3,Marketing,3,Female,...,3,4,1,23,2,4,20,4,4,8
1,40,0,Travel_Rarely,1124,Sales,1,2,Medical,2,Male,...,4,3,3,6,2,2,4,3,0,2
2,36,0,Travel_Frequently,635,Research & Development,18,1,Medical,2,Female,...,3,1,0,8,2,3,8,1,1,7
3,32,0,Travel_Rarely,1018,Research & Development,3,2,Life Sciences,3,Female,...,3,4,0,10,6,3,7,7,7,7
4,25,0,Travel_Rarely,583,Sales,4,1,Marketing,3,Male,...,3,1,0,5,1,4,5,2,0,3


Prediksi 10 karyawan dengan probabilitas tertinggi untuk meninggalkan perusahaan

In [13]:
predicted_attrition_risk = model.predict(X_scaled)
current_employees['PredictedAttrition'] = predicted_attrition_risk
current_employees.sort_values(by='PredictedAttrition', ascending=False).head(10)

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,Gender,...,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,PredictedAttrition
471,29,0,Travel_Rarely,144,Sales,10,1,Marketing,4,Female,...,1,2,7,2,3,7,7,1,7,1
575,28,0,Travel_Rarely,640,Research & Development,1,3,Technical Degree,4,Male,...,2,0,5,2,2,3,2,1,2,1
418,22,0,Travel_Rarely,217,Research & Development,8,1,Life Sciences,2,Male,...,1,1,4,3,2,4,3,1,1,1
630,41,0,Travel_Rarely,167,Research & Development,12,4,Life Sciences,2,Male,...,1,1,6,4,3,1,0,0,0,1
208,26,0,Travel_Rarely,1443,Sales,23,3,Marketing,3,Female,...,3,1,5,2,2,2,2,0,0,1
475,30,0,Travel_Frequently,1012,Research & Development,5,4,Life Sciences,2,Male,...,2,1,10,3,2,5,4,0,3,1
206,26,0,Travel_Frequently,496,Research & Development,11,2,Medical,1,Male,...,3,1,5,3,3,5,3,3,3,1
148,25,0,Travel_Rarely,180,Research & Development,2,1,Medical,1,Male,...,3,0,6,3,2,4,3,0,1,1
534,28,0,Travel_Frequently,193,Research & Development,2,3,Life Sciences,4,Male,...,2,1,2,2,3,2,2,2,2,1
94,18,0,Travel_Rarely,812,Sales,10,3,Medical,4,Female,...,1,0,0,2,3,0,0,0,0,1
