In [2]:
# Importar librerías
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV, learning_curve
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix

# Cargar los datos
column_names = [
    'age', 'workclass', 'fnlwgt', 'education', 'education-num',
    'marital-status', 'occupation', 'relationship', 'race', 'sex',
    'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income'
]

df = pd.read_csv(
    'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data',
    names=column_names, na_values=' ?', sep=',\s', engine='python'
)

# Eliminar filas con valores faltantes
df = df.dropna()

# Codificar la variable objetivo
df['income'] = LabelEncoder().fit_transform(df['income'])

# Separar X (features) e y (target)
X = df.drop("income", axis=1)
y = df["income"]

# Codificar variables categóricas en X
cat_cols = X.select_dtypes(include=['object']).columns
for col in cat_cols:
    X[col] = LabelEncoder().fit_transform(X[col].astype(str))

# Escalar variables numéricas de X
num_cols = X.select_dtypes(include=['int64', 'float64']).columns
X[num_cols] = StandardScaler().fit_transform(X[num_cols])


In [3]:
# Dividir en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definir los parámetros a buscar
params = {
    'C': [1, 10],
    'kernel': ['linear', 'rbf']
}

# Búsqueda de hiperparámetros con GridSearchCV
clf = GridSearchCV(SVC(), param_grid=params, cv=3)
clf.fit(X_train, y_train)

# Predicciones
y_pred = clf.predict(X_test)

# Evaluación
print("Matriz de confusión:")
print(confusion_matrix(y_test, y_pred))
print("\nReporte de clasificación:")
print(classification_report(y_test, y_pred))


Matriz de confusión:
[[4675  267]
 [ 697  874]]

Reporte de clasificación:
              precision    recall  f1-score   support

           0       0.87      0.95      0.91      4942
           1       0.77      0.56      0.64      1571

    accuracy                           0.85      6513
   macro avg       0.82      0.75      0.78      6513
weighted avg       0.85      0.85      0.84      6513

