## Librerias


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import(
    KFold,
    ShuffleSplit,
    cross_val_score,
    learning_curve,
    train_test_split,
)
from sklearn.pipeline import Pipeline

## Carga del dataset (prepocesado)

In [2]:
df=pd.read_csv('datos_personas.csv')

In [3]:
df

Unnamed: 0,edad,dependientes,salario,categoria_nw_cliente,codigo_sucursal,saldo_actual,saldo_final_mes_anterior,saldo_mes_actual,saldo_mes_anterior,objetivo,genero_Mujer,ocupacion_Independiente,ocupacion_Pensionado,ocupacion_Trabajador
0,66,0,187,2,755,1458,1458,1458,1458,0,0,1,0,0
1,31,0,146,2,41,3913,5815,5006,5070,0,0,0,0,1
2,42,2,1494,3,388,927,1401,1157,1677,1,0,1,0,0
3,42,0,1096,2,1666,15202,16059,15719,15349,0,1,1,0,0
4,72,0,1020,1,1,7006,7714,7076,7755,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22318,85,0,1589,2,389,1741,1741,1741,1741,0,0,0,1,0
22319,10,0,1020,2,1207,1076,1076,1076,1076,0,1,0,0,0
22320,47,0,1096,2,588,65511,61017,61078,57564,1,0,0,0,1
22321,50,3,1219,3,274,1625,1625,1625,1625,0,0,1,0,0


In [4]:
features = ['edad', 'dependientes', 'salario','saldo_actual','genero_Mujer','ocupacion_Independiente','ocupacion_Pensionado','ocupacion_Trabajador']

In [5]:
x_features = df[features]

In [6]:
y_target = df['objetivo']

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x_features, y_target, test_size=0.2, stratify=y_target)

In [8]:
x_train.shape, y_train.shape

((17858, 8), (17858,))

In [9]:
x_test.shape, y_test.shape

((4465, 8), (4465,))

In [10]:
x_train

Unnamed: 0,edad,dependientes,salario,saldo_actual,genero_Mujer,ocupacion_Independiente,ocupacion_Pensionado,ocupacion_Trabajador
14217,65,0,623,2409,1,1,0,0
15720,37,0,1177,7114,0,1,0,0
20031,32,0,334,10135,1,0,0,0
20751,34,0,1020,8046,0,0,0,1
17026,66,0,146,1678,1,1,0,0
...,...,...,...,...,...,...,...,...
13138,12,0,1020,3697,1,0,0,0
21330,39,0,409,2391,0,1,0,0
3584,50,0,1096,20,1,1,0,0
14809,77,0,1020,7558,0,0,1,0


In [11]:
x_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 17858 entries, 14217 to 21709
Data columns (total 8 columns):
 #   Column                   Non-Null Count  Dtype
---  ------                   --------------  -----
 0   edad                     17858 non-null  int64
 1   dependientes             17858 non-null  int64
 2   salario                  17858 non-null  int64
 3   saldo_actual             17858 non-null  int64
 4   genero_Mujer             17858 non-null  int64
 5   ocupacion_Independiente  17858 non-null  int64
 6   ocupacion_Pensionado     17858 non-null  int64
 7   ocupacion_Trabajador     17858 non-null  int64
dtypes: int64(8)
memory usage: 1.2 MB


In [12]:
y_test

Unnamed: 0,objetivo
17166,0
3892,0
6878,0
4545,0
20012,1
...,...
11791,1
20305,0
5950,0
16456,1


In [13]:
class HeuristicModel(BaseEstimator, ClassifierMixin):
    """
    Modelo heurístico compatible con scikit-learn.
    Toma decisiones basadas en reglas simples sobre las columnas del dataset.
    """

    def __init__(self, dependientes_threshold=100):
        self.dependientes_threshold = dependientes_threshold

    def fit(self, X, y=None):
        # Guarda las clases únicas para compatibilidad con scikit-learn
        if y is not None:
            self.classes_ = np.unique(y)
        return self

    def predict(self, X):
        predictions = []

        # Iterar sobre las filas del DataFrame
        for _, row in X.iterrows():
            # Regla heurística: si 'dependientes' supera el umbral, predice 1
            if row['dependientes'] <= self.dependientes_threshold:
                predictions.append(1)
            else:
                predictions.append(0)

        return np.array(predictions)

In [25]:
model_he = HeuristicModel(dependientes_threshold=100)
predictions_he = model_he.predict(x_test)

In [26]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calcular métricas

accuracy_he = accuracy_score(y_test, predictions_he)
print(f'Accuracy del HeuristicModel: {accuracy_he:.4f}')

precision_he = precision_score(y_test, predictions_he)
print(f'Precision del HeuristicModel: {precision_he:.4f}')

recall_he = recall_score(y_test, predictions_he)
print(f'Recall del HeuristicModel: {recall_he:.4f}')

f1_score_he = f1_score(y_test, predictions_he)
print(f'F1_Score del HeuristicModel: {f1_score_he:.4f}')


Accuracy del HeuristicModel: 0.1843
Precision del HeuristicModel: 0.1843
Recall del HeuristicModel: 1.0000
F1_Score del HeuristicModel: 0.3113
