In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# Cargar los datos de entrenamiento
data = pd.read_csv('recursos_humanos.csv')

In [2]:
# Verificar el balance de clases
print(data['left'].value_counts())

left
0    11428
1     3571
Name: count, dtype: int64


In [3]:
# Normalización de los datos de entrenamiento
scaler = MinMaxScaler()
data[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']] = scaler.fit_transform(data[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']])


In [4]:
data

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,sales,salary
0,0.318681,0.265625,0.0,0.285047,0.125,0,1,0,sales,low
1,0.780220,0.781250,0.6,0.775701,0.500,0,1,0,sales,medium
2,0.021978,0.812500,1.0,0.822430,0.250,0,1,0,sales,medium
3,0.692308,0.796875,0.6,0.593458,0.375,0,1,0,sales,low
4,0.307692,0.250000,0.0,0.294393,0.125,0,1,0,sales,low
...,...,...,...,...,...,...,...,...,...,...
14994,0.340659,0.328125,0.0,0.257009,0.125,0,1,0,support,low
14995,0.307692,0.187500,0.0,0.299065,0.125,0,1,0,support,low
14996,0.307692,0.265625,0.0,0.219626,0.125,0,1,0,support,low
14997,0.021978,0.937500,0.8,0.859813,0.250,0,1,0,support,low


In [5]:
# convertir sales y salary a variables dummy

data = pd.get_dummies(data, columns=['sales', 'salary'])


In [6]:
# Dividir los datos en conjuntos de entrenamiento y prueba
X = data.drop('left', axis=1)  # 'left' es la columna objetivo
y = data['left']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=20)


In [7]:
# Guardar las columnas del DataFrame de entrenamiento
columnas_entrenamiento = X_train.columns

# Entrenamiento del modelo
clf = SVC(kernel='rbf')
clf.fit(X_train, y_train)

In [8]:
# Evaluar el modelo en el conjunto de prueba
y_pred = clf.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[3308  136]
 [ 150  906]]
              precision    recall  f1-score   support

           0       0.96      0.96      0.96      3444
           1       0.87      0.86      0.86      1056

    accuracy                           0.94      4500
   macro avg       0.91      0.91      0.91      4500
weighted avg       0.94      0.94      0.94      4500



In [9]:
doc_prueba = pd.read_csv('sujeto1.csv')
doc_prueba

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years,sales,salary
0,0.5,0.75,4,200,4,0,0,sales,medium
1,0.38,0.53,2,157,3,0,0,sales,low
2,0.95,0.98,5,155,3,0,0,accounting,low


In [13]:
# Normalización de los datos de prueba y reindexar las columnas de doc_prueba
df_prueba = doc_prueba.iloc[[0]].reindex(columns=columnas_entrenamiento, fill_value=0)
df_prueba[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']] = scaler.transform(df_prueba[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']])

# Realizar la predicción
prediccion = clf.predict(df_prueba)

if prediccion == 1:
    print('El empleado renunciará')
else:
    print('El empleado no renunciará')

El empleado no renunciará


In [14]:
# Prueba con un empleado que sí renunció
df_prueba = doc_prueba.iloc[[1]].reindex(columns=columnas_entrenamiento, fill_value=0)
df_prueba[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']] = scaler.transform(df_prueba[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']])

# Realizar la predicción
prediccion = clf.predict(df_prueba)

if prediccion == 1:
    print('El empleado renunciará')
else:
    print('El empleado no renunciará')

El empleado renunciará


In [15]:
# Prueba con un empleado que no renunció
df_prueba = doc_prueba.iloc[[2]].reindex(columns=columnas_entrenamiento, fill_value=0)
df_prueba[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']] = scaler.transform(df_prueba[['average_montly_hours', 'time_spend_company', 'number_project', 'satisfaction_level', 'last_evaluation']])

# Realizar la predicción
prediccion = clf.predict(df_prueba)

if prediccion == 1:
    print('El empleado renunciará')
else:
    print('El empleado no renunciará')

El empleado no renunciará
