In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from keras.models import Sequential
from keras.layers import Dense

In [15]:
# Cargar datos
data = pd.read_csv('data_evaluacion.csv', header=None)
data.columns = ['Edad', 'Trabajo', 'Peso', 'Educacion', 'EduNum', 'EstadoCivil', 'Ocupacion', 'Relacion', 'Raza', 'Sexo', 'GananciaCapital', 'PerdidaCapital', 'HorasSemana', 'PaisOrigen', 'Ingreso']
data.head()

Unnamed: 0,Edad,Trabajo,Peso,Educacion,EduNum,EstadoCivil,Ocupacion,Relacion,Raza,Sexo,GananciaCapital,PerdidaCapital,HorasSemana,PaisOrigen,Ingreso
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


In [16]:
# Convertir la variable objetivo a binaria
data['Ingreso'] = data['Ingreso'].apply(lambda x: 1 if x == '>50K' else 0)

In [17]:
# Convertir etiquetas categóricas a numéricas
label_encoders = {}
categorical_columns = ['Trabajo', 'Educacion', 'EstadoCivil', 'Ocupacion', 'Relacion', 'Raza', 'Sexo', 'PaisOrigen']
for column in categorical_columns:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

In [18]:
# División de datos
X = data.drop('Ingreso', axis=1)
y = data['Ingreso']

In [19]:
# Normalizar los datos
scaler = StandardScaler()
X = scaler.fit_transform(X)

# División en prueba y entrenamiento
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [30]:
# Definir el modelo
modelo = Sequential()

# Primera capa oculta
modelo.add(Dense(64, activation='relu', input_shape=[X_train.shape[1]]))

# Segunda capa oculta
modelo.add(Dense(32, activation='relu'))

# Segunda capa oculta
modelo.add(Dense(16, activation='relu'))

# Capa de salida
modelo.add(Dense(1, activation='sigmoid'))  # Usar softmax para problemas multiclase

# Configuración del modelo
modelo.compile(
    loss='binary_crossentropy',  # Para problemas multiclase
    optimizer='adam',
    metrics=['accuracy']
)

historial = modelo.fit(X_train,y_train, epochs = 50,batch_size=32) #batch registro para ver aprendizaje verbose

Epoch 1/50
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 570us/step - accuracy: 0.8087 - loss: 0.4028
Epoch 2/50
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 571us/step - accuracy: 0.8504 - loss: 0.3192
Epoch 3/50
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 567us/step - accuracy: 0.8500 - loss: 0.3188
Epoch 4/50
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 573us/step - accuracy: 0.8537 - loss: 0.3148
Epoch 5/50
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 588us/step - accuracy: 0.8542 - loss: 0.3130
Epoch 6/50
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 571us/step - accuracy: 0.8541 - loss: 0.3123
Epoch 7/50
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 598us/step - accuracy: 0.8545 - loss: 0.3097
Epoch 8/50
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 624us/step - accuracy: 0.8537 - loss: 0.3103
Epoch 9/

In [31]:
modelo.evaluate(X_test,y_test)

[1m306/306[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 466us/step - accuracy: 0.8446 - loss: 0.3425


[0.3397183120250702, 0.8467601537704468]

In [32]:
import pickle
# Guardar el modelo y el scaler
with open('RNA_model.pkl', 'wb') as model_file:
    pickle.dump(modelo, model_file)
with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)
with open('label_encoders.pkl', 'wb') as encoder_file:
    pickle.dump(label_encoders, encoder_file)