In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [None]:
# Cargar datos
data = pd.read_csv('data_evaluacion.csv', header=None)
data.columns = ['Edad', 'Trabajo', 'Peso', 'Educacion', 'EduNum', 'EstadoCivil', 'Ocupacion', 'Relacion', 'Raza', 'Sexo', 'GananciaCapital', 'PerdidaCapital', 'HorasSemana', 'PaisOrigen', 'Ingreso']
data.head()

In [None]:
# Convertir la variable objetivo a binaria
data['Ingreso'] = data['Ingreso'].apply(lambda x: 1 if x == '>50K' else 0)

In [None]:
# División de datos 
X = data.drop('Ingreso', axis=1)
y = data['Ingreso']

# Division en prueba y entrenamiento
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Separar características numéricas y categóricas
num = ['Edad', 'Peso', 'EduNum', 'GananciaCapital', 'PerdidaCapital', 'HorasSemana']
cat = ['Trabajo', 'Educacion', 'EstadoCivil', 'Ocupacion', 'Relacion', 'Raza', 'Sexo', 'PaisOrigen']

# Escalar características numéricas
scaler = StandardScaler()
X_train[num] = scaler.fit_transform(X_train[num])
X_test[num] = scaler.transform(X_test[num])

In [None]:
# Codificar características categóricas
encoder = OneHotEncoder(drop='first',handle_unknown='ignore', sparse=False)
X_train_cat = pd.DataFrame(encoder.fit_transform(X_train[cat]), columns=encoder.get_feature_names_out(cat))
X_test_cat = pd.DataFrame(encoder.transform(X_test[cat]), columns=encoder.get_feature_names_out(cat))

In [None]:
# Concatenar características numéricas y categóricas
X_train = pd.concat([X_train[num].reset_index(drop=True), X_train_cat.reset_index(drop=True)], axis=1)
X_test = pd.concat([X_test[num].reset_index(drop=True), X_test_cat.reset_index(drop=True)], axis=1)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

model_lr = LogisticRegression(max_iter=1000, random_state=42)
model_lr.fit(X_train, y_train)
y_pred_lr = model_lr.predict(X_test)

print("Regresión Logística - Accuracy:", accuracy_score(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for kernel in kernels:
    model_svm = SVC(kernel=kernel, random_state=42)
    model_svm.fit(X_train, y_train)
    y_pred_svm = model_svm.predict(X_test)
    
    print(f"SVM con kernel='{kernel}' - Accuracy:", accuracy_score(y_test, y_pred_svm))
    print(classification_report(y_test, y_pred_svm))


In [None]:
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from sklearn.metrics import classification_report, accuracy_score

k_values = range(1, 21)
best_k = 1
best_score = 0

for k in k_values:
    model_knn = KNeighborsClassifier(n_neighbors=k)
    model_knn.fit(X_train, y_train)
    y_pred_knn = model_knn.predict(X_test)
    score = accuracy_score(y_test, y_pred_knn)
    
    if score > best_score:
        best_k = k
        best_score = score
    
    print(f"k-NN con k={k} - Accuracy:", score)

print(f"Mejor k: {best_k} con Accuracy: {best_score}")


In [None]:
from sklearn.tree import DecisionTreeClassifier

model_dt = DecisionTreeClassifier(random_state=42)
model_dt.fit(X_train, y_train)
y_pred_dt = model_dt.predict(X_test)

print("Árbol de Decisión - Accuracy:", accuracy_score(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt))


In [None]:
from sklearn.naive_bayes import GaussianNB

model_nb = GaussianNB()
model_nb.fit(X_train, y_train)
y_pred_nb = model_nb.predict(X_test)

print("Naive Bayes - Accuracy:", accuracy_score(y_test, y_pred_nb))
print(classification_report(y_test, y_pred_nb))


In [None]:
from keras.models import Sequential
from keras.layers import Dense
# Define multi-layer perceptron (MLP)
mlp_model = Sequential()


mlp_model.add(Dense(64, activation='relu', input_shape=[X_train.shape[1]]))
mlp_model.add(Dense(32, activation='relu'))
mlp_model.add(Dense(16, activation='relu'))
mlp_model.add(Dense(1, activation='softmax'))

mlp_model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# Train model
mlp_history = mlp_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

mlp_model.evaluate(X_test,y_test)