In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# Cargar los datos de entrenamiento
data = pd.read_csv('recursos_humanos.csv')

In [2]:
# Verificar el balance de clases
print(data['left'].value_counts())

left
0    11428
1     3571
Name: count, dtype: int64


In [3]:
# Normalización de los datos de entrenamiento
scaler = MinMaxScaler()
data[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']] = scaler.fit_transform(data[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']])


In [8]:
data

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,sales,salary
0,0.318681,0.265625,0.0,0.285047,0.125,0,1,0,sales,low
1,0.780220,0.781250,0.6,0.775701,0.500,0,1,0,sales,medium
2,0.021978,0.812500,1.0,0.822430,0.250,0,1,0,sales,medium
3,0.692308,0.796875,0.6,0.593458,0.375,0,1,0,sales,low
4,0.307692,0.250000,0.0,0.294393,0.125,0,1,0,sales,low
...,...,...,...,...,...,...,...,...,...,...
14994,0.340659,0.328125,0.0,0.257009,0.125,0,1,0,support,low
14995,0.307692,0.187500,0.0,0.299065,0.125,0,1,0,support,low
14996,0.307692,0.265625,0.0,0.219626,0.125,0,1,0,support,low
14997,0.021978,0.937500,0.8,0.859813,0.250,0,1,0,support,low


In [9]:
# convertir sales y salary a variables dummy

data = pd.get_dummies(data, columns=['sales', 'salary'])
data.dtypes

satisfaction_level       float64
last_evaluation          float64
number_project           float64
average_montly_hours     float64
time_spend_company       float64
Work_accident              int64
left                       int64
promotion_last_5years      int64
sales_IT                    bool
sales_RandD                 bool
sales_accounting            bool
sales_hr                    bool
sales_management            bool
sales_marketing             bool
sales_product_mng           bool
sales_sales                 bool
sales_support               bool
sales_technical             bool
salary_high                 bool
salary_low                  bool
salary_medium               bool
dtype: object

In [10]:
# Dividir los datos en conjuntos de entrenamiento y prueba
X = data.drop('left', axis=1)  # 'left' es la columna objetivo
y = data['left']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Entrenamiento del modelo
clf = SVC(kernel='rbf')
clf.fit(X_train, y_train)

# Evaluar el modelo en el conjunto de prueba
y_pred = clf.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[2215   79]
 [  96  610]]
              precision    recall  f1-score   support

           0       0.96      0.97      0.96      2294
           1       0.89      0.86      0.87       706

    accuracy                           0.94      3000
   macro avg       0.92      0.91      0.92      3000
weighted avg       0.94      0.94      0.94      3000



In [11]:
# Normalización de los datos de prueba
df_prueba = pd.DataFrame({'satisfaction_level':[0.5],'last_evaluation':[0.75],'number_project':[4],'average_montly_hours':[200],'time_spend_company':[4],'Work_accident':[0],'promotion_last_5years':[0],'salary_high':[0],'salary_low':[0],'salary_medium':[1],'sales_IT':[0],'sales_RandD':[0],'sales_accounting':[0],'sales_hr':[0],'sales_management':[0],'sales_marketing':[0],'sales_product_mng':[0],'sales_sales':[1],'sales_support':[0],'sales_technical':[0]})
df_prueba[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']] = scaler.transform(df_prueba[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']])

# Realizar la predicción
prediccion = clf.predict(df_prueba)


if prediccion == 1:
    print('El empleado se va a retirar')
else:
    print('El empleado no se va a retirar')

ValueError: The feature names should match those that were passed during fit.
Feature names must be in the same order as they were in fit.


In [None]:
# Prueba con un empleado que sí renunció
Si_ren = pd.DataFrame({'satisfaction_level':[0.1],'last_evaluation':[0.95],'number_project':[6],'average_montly_hours':[244],'time_spend_company':[5],'Work_accident':[0],'promotion_last_5years':[1],'salary_high':[0],'salary_low':[0],'salary_medium':[1],'sales_IT':[1],'sales_RandD':[0],'sales_accounting':[0],'sales_hr':[0],'sales_management':[0],'sales_marketing':[0],'sales_product_mng':[0],'sales_sales':[0],'sales_support':[0],'sales_technical':[0]})
Si_ren[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']] = scaler.transform(Si_ren[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']])
prediccion_si_ren = clf.predict(Si_ren)

if prediccion_si_ren == 1:
    print('El empleado se va a retirar')
else:
    print('El empleado no se va a retirar')

In [None]:
# Prueba con un empleado que no renunció
No_ren = pd.DataFrame({'satisfaction_level':[0.88],'last_evaluation':[0.58],'number_project':[4],'average_montly_hours':[147],'time_spend_company':[4],'Work_accident':[0],'promotion_last_5years':[0],'salary_high':[0],'salary_low':[0],'salary_medium':[1],'sales_IT':[0],'sales_RandD':[0],'sales_accounting':[0],'sales_hr':[0],'sales_management':[0],'sales_marketing':[0],'sales_product_mng':[0],'sales_sales':[0],'sales_support':[1],'sales_technical':[0]})
No_ren[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']] = scaler.transform(No_ren[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']])
prediccion_no_ren = clf.predict(No_ren)

if prediccion_no_ren == 1:
    print('El empleado se va a retirar')
else:
    print('El empleado no se va a retirar')