# Análisis de Datos — Clasificación Binaria

**Objetivo:** análisis exploratorio y modelo de clasificación binaria usando `scikit-learn`.

**Dataset:** `load_breast_cancer`.

> Ejecuta las celdas en orden. Completa las conclusiones al final.


In [None]:
# Imports básicos
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt


In [None]:
# Carga de datos
data = load_breast_cancer(as_frame=True)
df = data.frame
df.head()

In [None]:
# División en train/test
X = df.drop(columns=['target'])
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train.shape, X_test.shape

In [None]:
# Escalado y modelo
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)
acc = accuracy_score(y_test, y_pred)
print('Accuracy:', acc)

In [None]:
# Reporte y matriz de confusión
print(classification_report(y_test, y_pred, target_names=data.target_names))

cm = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots()
im = ax.imshow(cm)
ax.set_title('Matriz de confusión')
ax.set_xlabel('Predicción'); ax.set_ylabel('Real')
for (i,j), val in np.ndenumerate(cm):
    ax.text(j, i, int(val), ha='center', va='center')
plt.show()

In [None]:
# Conclusiones (edita con tus palabras)
print('Conclusión: describe el desempeño del modelo y posibles mejoras.')