In [None]:
# 1. Cargar dataset clasificado (Iris)
from sklearn.datasets import load_iris
import pandas as pd
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target, name='target')
df = pd.concat([X, y], axis=1)
df.head()

## Visualización básica de los datos

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.pairplot(df, hue='target')
plt.show()

## Entrenamiento de varios modelos de clasificación

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
models = {
    'Logistic Regression': LogisticRegression(max_iter=200),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC()
}
results = {}
for name, model in models.items():
    scores = cross_val_score(model, X, y, cv=5)
    results[name] = scores
pd.DataFrame(results)

## Matriz de confusión para cada modelo

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=iris.target_names)
    print(f'Confusion Matrix for {name}')
    disp.plot()
    plt.show()

## Análisis de instancias según la matriz de confusión
Ejemplo: mostrar instancias mal clasificadas por el modelo Random Forest.

In [None]:
rf = models['Random Forest']
y_pred_rf = rf.predict(X_test)
misclassified = X_test[y_pred_rf != y_test]
misclassified['true_label'] = y_test[y_pred_rf != y_test]
misclassified['predicted_label'] = y_pred_rf[y_pred_rf != y_test]
misclassified

## Visualización y análisis adicional de resultados

In [None]:
# Comparación de precisión promedio de cada modelo
avg_scores = {name: scores.mean() for name, scores in results.items()}
plt.bar(avg_scores.keys(), avg_scores.values())
plt.ylabel('Precisión promedio (CV)')
plt.title('Comparación de Modelos')
plt.show()