<a href="https://colab.research.google.com/github/JonathanJuradoS/Exportacion/blob/main/Espacio_Practico_Clasificacion_Empleo_RESUELTO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🧪 Clasificación de Candidatos Tecnológicos (Versión Realista - Exportación de Modelo y Scaler)

### Paso 1: Importar librerías necesarias

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, accuracy_score, f1_score


### Paso 2: Cargar y explorar el dataset

In [2]:
df = pd.read_csv("/content/empleo_seleccion_realista.csv")
df.head()

Unnamed: 0,nivel_educacion,experiencia_anios,python_score,sql_score,java_score,num_certificaciones,proyectos_open_source,puntaje_test_online,disponibilidad_inmediata,edad,seleccionado
0,Titulado,14,26.17,57.2,64.83,2,9,95.44,0,32,0
1,Maestria,11,24.7,80.54,17.24,4,2,73.82,0,27,0
2,Tecnico,13,90.63,76.02,87.24,1,7,73.44,0,21,0
3,Titulado,15,24.95,15.39,61.31,3,4,7.25,1,26,0
4,Titulado,7,27.19,14.92,15.72,4,7,89.53,1,30,0


### Paso 3: Preprocesamiento y exportación de scaler

In [3]:
df_encoded = pd.get_dummies(df, drop_first=True)
X = df_encoded.drop("seleccionado", axis=1)
y = df_encoded["seleccionado"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Exportar scaler
joblib.dump(scaler, "scaler_empleo_boosting.pkl")
print("Scaler exportado como scaler_empleo_boosting.pkl")

Scaler exportado como scaler_empleo_boosting.pkl


### Paso 4: Entrenamiento y evaluación de modelos

In [4]:
models = {
    "SVM": SVC(probability=True),
    "KNN": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Boosting": GradientBoostingClassifier()
}

for model_name, model in models.items():
    print(f"\n\033[1mModelo: {model_name}\033[0m")
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    if hasattr(model, "predict_proba"):
        y_prob = model.predict_proba(X_test_scaled)[:, 1]
        print("AUC-ROC:", roc_auc_score(y_test, y_prob))
    print("F1-Score:", f1_score(y_test, y_pred))
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))


[1mModelo: SVM[0m
AUC-ROC: 0.4879725537620274
F1-Score: 0.021621621621621623
Accuracy: 0.6983333333333334
              precision    recall  f1-score   support

           0       0.70      1.00      0.82       418
           1       0.67      0.01      0.02       182

    accuracy                           0.70       600
   macro avg       0.68      0.50      0.42       600
weighted avg       0.69      0.70      0.58       600


[1mModelo: KNN[0m
AUC-ROC: 0.5190598874809401
F1-Score: 0.2698961937716263
Accuracy: 0.6483333333333333
              precision    recall  f1-score   support

           0       0.71      0.84      0.77       418
           1       0.36      0.21      0.27       182

    accuracy                           0.65       600
   macro avg       0.54      0.53      0.52       600
weighted avg       0.61      0.65      0.62       600


[1mModelo: Decision Tree[0m
AUC-ROC: 0.49174509700825486
F1-Score: 0.3128205128205128
Accuracy: 0.5533333333333333
            

### Paso 5: Exportar el mejor modelo (Boosting)

In [5]:
modelo_final = models["Boosting"]
joblib.dump(modelo_final, "modelo_empleo_boosting.pkl")
print("Modelo exportado como modelo_empleo_boosting.pkl")

Modelo exportado como modelo_empleo_boosting.pkl
