# Paso 1. Se importan las librerias

In [23]:
import pandas as pd
import tensorflow as tf
import tensorflow.keras as kr
import numpy as np

from summarytools import dfSummary
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.utils import to_categorical


# Paso 2. Cargar los datos

In [5]:
import pandas as pd

# Carga el archivo Excel
df = pd.read_excel('BDD-Reclamos-Consolidado-Total Reclamos.xlsx')

In [6]:
df.head()

Unnamed: 0,Reclamos,ipcprin,Edad,DepartamentoDescripcion,Sexo,EstadoCivil,Rentabilidadantesreclamo,Segmentoantesreclamo,Area,Abandono
0,2,3,42,GUATEMALA ...,FEMENINO ...,CASADO ...,9046.941583,BAJO,ATENCION AL CLIENTE,0
1,1,2,36,GUATEMALA ...,FEMENINO ...,SOLTERO ...,4.86065,MASIVO,ATENCION AL CLIENTE,0
2,1,2,48,GUATEMALA ...,MASCULINO ...,SOLTERO ...,1279.018169,MEDIO,ATENCION AL CLIENTE,0
3,1,3,41,ZACAPA ...,MASCULINO ...,CASADO ...,13546.512618,PRIVADO,ATENCION AL CLIENTE,0
4,1,5,50,GUATEMALA ...,MASCULINO ...,CASADO ...,14727.358352,ALTO,EMPRESARIAL,0


In [7]:
dfSummary(df)

No,Variable,Stats / Values,Freqs / (% of Valid),Graph,Missing
1,Reclamos [int64],Mean (sd) : 1.3 (0.7) min < med < max: 1.0 < 1.0 < 8.0 IQR (CV) : 0.0 (1.8),7 distinct values,,0 (0.0%)
2,ipcprin [int64],Mean (sd) : 3.2 (2.1) min < med < max: 0.0 < 3.0 < 40.0 IQR (CV) : 2.0 (1.5),17 distinct values,,0 (0.0%)
3,Edad [int64],Mean (sd) : 38.8 (12.7) min < med < max: 18.0 < 36.0 < 93.0 IQR (CV) : 17.0 (3.1),72 distinct values,,0 (0.0%)
4,DepartamentoDescripcion [object],1. GUATEMALA 2. QUETZALTENANGO 3. SACATEPEQUEZ 4. ESCUINTLA 5. CHIMALTENANGO 6. IZABAL 7. SUCHITEPEQUEZ 8. SAN MARCOS 9. HUEHUETENANGO 10. RETALHULEU 11. other,"3,137 (84.0%) 110 (2.9%) 89 (2.4%) 78 (2.1%) 39 (1.0%) 34 (0.9%) 30 (0.8%) 22 (0.6%) 22 (0.6%) 20 (0.5%) 155 (4.1%)",,0 (0.0%)
5,Sexo [object],1. MASCULINO 2. FEMENINO,"2,031 (54.4%) 1,705 (45.6%)",,0 (0.0%)
6,EstadoCivil [object],1. SOLTERO 2. CASADO 3. UNIDO 4. DIVORCIADO,"2,673 (71.5%) 1,057 (28.3%) 5 (0.1%) 1 (0.0%)",,0 (0.0%)
7,Rentabilidadantesreclamo [float64],Mean (sd) : 5774.7 (69262.4) min < med < max: -35.6 < 575.1 < 4152730.3 IQR (CV) : 3487.5 (0.1),"2,858 distinct values",,0 (0.0%)
8,Segmentoantesreclamo [object],1. MEDIO 2. BAJO 3. MASIVO 4. ALTO 5. PRIVADO 6. SIN SALDO 7. MOROSO,"1,108 (29.7%) 906 (24.3%) 901 (24.1%) 560 (15.0%) 143 (3.8%) 111 (3.0%) 7 (0.2%)",,0 (0.0%)
9,Area [object],1. ATENCION AL CLIENTE 2. CLIENTES DE CONSUMO 3. NO CODIFICADO 4. CABLE 5. POST PAGO 6. JURIDICO 7. EMPRESARIAL 8. VIP 9. RESIDENCIAL 10. HOGAR,"2,801 (75.0%) 725 (19.4%) 79 (2.1%) 53 (1.4%) 30 (0.8%) 14 (0.4%) 13 (0.3%) 13 (0.3%) 6 (0.2%) 2 (0.1%)",,0 (0.0%)
10,Abandono [int64],Mean (sd) : 0.1 (0.3) min < med < max: 0.0 < 0.0 < 1.0 IQR (CV) : 0.0 (0.3),2 distinct values,,0 (0.0%)


# Paso 3. Preprocesamiendo de Datos

In [8]:
# Imputar Valores Faltantes en Variables Numérica
df['Reclamos'].fillna(df['Reclamos'].mean(), inplace=True)
df['ipcprin'].fillna(df['ipcprin'].mean(), inplace=True)
df['Edad'].fillna(df['Edad'].mean(), inplace=True)
df['Rentabilidadantesreclamo'].fillna(df['Rentabilidadantesreclamo'].mean(), inplace=True)


In [9]:
# Tratamiento de Outliers utilizando el metódo de Winsorizing para limitar los valores extremos

from scipy.stats.mstats import winsorize

df['Reclamos'] = winsorize(df['Reclamos'], limits=[0.05, 0.05])
df['ipcprin'] = winsorize(df['ipcprin'], limits=[0.05, 0.05])
df['Edad'] = winsorize(df['Edad'], limits=[0.05, 0.05])
df['Rentabilidadantesreclamo'] = winsorize(df['Rentabilidadantesreclamo'], limits=[0.05, 0.05])


In [10]:
#One-Hot Encoding para Variables Categóricas
df = pd.get_dummies(df, columns=['Segmentoantesreclamo', 'EstadoCivil', 'Sexo', 'DepartamentoDescripcion', 'Area'], drop_first=True)


In [11]:
# División de Datos en Entrenamiento y Prueba
X = df.drop(['Abandono'], axis=1)  # Variables independientes
y = df['Abandono']  # Variable objetivo

In [12]:
dummy_y = kr.utils.to_categorical(y)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, dummy_y, test_size=0.2, random_state=42)

# Paso 4. Modelado

### Creación Red Neuronal

In [17]:
model= kr.models.Sequential()
model.add(kr.layers.Dense(46,input_dim=46,activation='relu'))
model.add(kr.layers.Dense(2,activation='softmax' ))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [18]:
model.fit(X_train, y_train, epochs=25, batch_size=10)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.src.callbacks.History at 0x1882b7c5430>

In [22]:
def create_model(num_layers):
    model= kr.models.Sequential()
    model.add(kr.layers.Dense(46,input_dim=46,activation='relu'))
    
    for _ in range(num_layers):
        model.add(kr.layers.Dense(46, activation='relu'))
        
    model.add(kr.layers.Dense(2, activation='softmax'))  # Ajuste en la cantidad de neuronas aquí
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def train_and_evaluate_model(num_layers, X_train, y_train, X_test, y_test):
    model = create_model(num_layers)
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)
    _, accuracy = model.evaluate(X_test, y_test)
    return accuracy

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

##y_train_one_hot = to_categorical(y_train)
###y_test_one_hot = to_categorical(y_test)

best_accuracy = 0
best_num_layers = 0

for num_layers in range(1, 6):
    accuracy = train_and_evaluate_model(num_layers, X_train_scaled, y_train, X_test_scaled, y_test)
    print(f"Accuracy with {num_layers} layers: {accuracy}")
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_num_layers = num_layers

print(f"Best accuracy achieved: {best_accuracy} with {best_num_layers} layers.")

Accuracy with 1 layers: 0.8823529481887817
Accuracy with 2 layers: 0.8810160160064697
Accuracy with 3 layers: 0.8823529481887817
Accuracy with 4 layers: 0.8770053386688232
Accuracy with 5 layers: 0.875668466091156
Best accuracy achieved: 0.8823529481887817 with 1 layers.


In [24]:
def create_model(num_neurons):
    model= kr.models.Sequential()
    model.add(kr.layers.Dense(46,input_dim=46,activation='relu'))
    model.add(kr.layers.Dense(2,activation='softmax' ))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def train_and_evaluate_model(num_neurons, X_train, y_train, X_test, y_test):
    model = create_model(num_neurons)
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)
    _, accuracy = model.evaluate(X_test, y_test)
    return accuracy

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

##y_train_one_hot = to_categorical(y_train)
##y_test_one_hot = to_categorical(y_test)

start_neurons = 10
end_neurons = 300
num_neurons_list = np.logspace(np.log10(start_neurons), np.log10(end_neurons), num=10, dtype=int)

best_accuracy = 0
best_num_neurons = 0

for num_neurons in num_neurons_list:
    accuracy = train_and_evaluate_model(num_neurons, X_train_scaled, y_train, X_test_scaled, y_test)
    print(f"Accuracy with {num_neurons} neurons: {accuracy}")
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_num_neurons = num_neurons

print(f"Best accuracy achieved: {best_accuracy} with {best_num_neurons} neurons.")

Accuracy with 10 neurons: 0.8796791434288025
Accuracy with 14 neurons: 0.883689820766449
Accuracy with 21 neurons: 0.8863636255264282
Accuracy with 31 neurons: 0.883689820766449
Accuracy with 45 neurons: 0.8823529481887817
Accuracy with 66 neurons: 0.8823529481887817
Accuracy with 96 neurons: 0.8810160160064697
Accuracy with 140 neurons: 0.883689820766449
Accuracy with 205 neurons: 0.8823529481887817
Accuracy with 300 neurons: 0.8823529481887817
Best accuracy achieved: 0.8863636255264282 with 21 neurons.


In [25]:
modelfinal= kr.models.Sequential()
modelfinal.add(kr.layers.Dense(21,input_dim=46,activation='relu'))
modelfinal.add(kr.layers.Dense(2,activation='softmax' ))
modelfinal.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [27]:
modelfinal.fit(X_train, y_train, epochs=25, batch_size=10)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.src.callbacks.History at 0x1882f3909d0>

# Paso 5. Evaluación de Modelo

In [19]:
loss, accuracy = model.evaluate(X_test, y_test)

# Imprimimos la precisión del modelo, multiplicada por 100 para obtener un porcentaje.
print('Precisión: %.2f' % (accuracy*100))

Precisión: 88.37


In [28]:
loss, accuracy = modelfinal.evaluate(X_test, y_test)

# Imprimimos la precisión del modelo, multiplicada por 100 para obtener un porcentaje.
print('Precisión: %.2f' % (accuracy*100))

Precisión: 88.37
