In [None]:
# 1. Importar librerías
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    confusion_matrix
)

# 2. Cargar dataset
df = pd.read_csv('C:\Users\Katherina\Downloads\bank+marketing\bank\bank.csv')
# 3. Limpieza de datos
df = df.drop_duplicates()

# Tipos de datos
df.info()

# Corrección de valores inconsistentes
df['job'] = df['job'].replace({'admin.': 'admin'})

# Verificar valores nulos
print("Valores nulos por columna:")
print(df.isnull().sum())

# 4. Exploración de datos
# Estadísticas
print(df.describe(include='all'))

# Visualización: Edad
plt.figure(figsize=(8, 4))
sns.histplot(df['age'], bins=30, kde=True)
plt.title("Distribución de Edad")
plt.show()

# Gráfico de barras por profesión
plt.figure(figsize=(10, 5))
df['job'].value_counts().plot(kind='bar')
plt.title("Distribución por Profesión")
plt.show()

# Mapa de calor
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap="coolwarm")
plt.title("Matriz de Correlación")
plt.show()

# 5. Preprocesamiento
# Convertir variable y
df['y'] = df['y'].map({'yes': 1, 'no': 0})

# One-hot encoding
df_encoded = pd.get_dummies(df, drop_first=True)

# Separar variables
X = df_encoded.drop('y', axis=1)
y = df_encoded['y']

# División entrenamiento/prueba
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Escalado
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 6. Modelo Árbol de Decisión
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

print("Árbol de Decisión:\n")
print(classification_report(y_test, y_pred_dt))
print("Matriz de Confusión:\n", confusion_matrix(y_test, y_pred_dt))

# 7. Modelo SVM
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

print("SVM:\n")
print(classification_report(y_test, y_pred_svm))
print("Matriz de Confusión:\n", confusion_matrix(y_test, y_pred_svm))

# 8. Comparación de Modelos
results = pd.DataFrame({
    'Modelo': ['Árbol de Decisión', 'SVM'],
    'Accuracy': [accuracy_score(y_test, y_pred_dt), accuracy_score(y_test, y_pred_svm)],
    'Precision': [precision_score(y_test, y_pred_dt), precision_score(y_test, y_pred_svm)],
    'Recall': [recall_score(y_test, y_pred_dt), recall_score(y_test, y_pred_svm)],
    'F1 Score': [f1_score(y_test, y_pred_dt), f1_score(y_test, y_pred_svm)]
})

print(" Comparación de Modelos:")
print(results)


SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \UXXXXXXXX escape (ipython-input-11-4003364482.py, line 21)