## Ejercicio hiperparametrización breast cancer de sklearn

1. Carga el dataset [breast_cancer de `sklearn`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_breast_cancer.html)
2. Prueba al menos 5 modelos diferentes de clasificación y aplica un GridSearchCV mediante Pipelines. Aplica también un RandomizedSearchCV.
3. Conclusiones. Guarda el modelo final en un archivo con pickle.

In [9]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import numpy as np

In [5]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
x = data.data
y = data.target

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)


In [10]:
# 2. Diccionario de modelos con sus parámetros
modelos_y_parametros = {
    'DecisionTree': (
        DecisionTreeClassifier(random_state=42),
        {
            'clf__criterion': ['gini', 'entropy'],
            'clf__max_depth': [3, 5, 7],
            'clf__min_samples_split': [2, 4, 6]
        }
    ),
    'RandomForest': (
        RandomForestClassifier(random_state=42),
        {
            'clf__n_estimators': [50, 100],
            'clf__max_depth': [3, 5, 7],
            'clf__min_samples_split': [2, 4]
        }
    ),
    'SVM': (
        SVC(random_state=42),
        {
            'clf__C': [0.1, 1, 10],
            'clf__kernel': ['linear', 'rbf']
        }
    ),
    'KNN': (
        KNeighborsClassifier(),
        {
            'clf__n_neighbors': [3, 5, 7],
            'clf__weights': ['uniform', 'distance']
        }
    ),
    'LogisticRegression': (
        LogisticRegression(max_iter=1000),
        {
            'clf__C': [0.01, 0.1, 1, 10],
            'clf__penalty': ['l2'],
            'clf__solver': ['lbfgs']
        }
    )
}

In [11]:
for nombre_modelo, (modelo, parametros) in modelos_y_parametros.items():
    print(f"\n🧪 Modelo: {nombre_modelo}")

    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('clf', modelo)
    ])

    # Grid Search
    print("🔍 GridSearchCV:")
    grid = GridSearchCV(pipeline, parametros, cv=5, n_jobs=-1)
    grid.fit(x_train, y_train)
    print("  Mejor score (Grid):", grid.best_score_)
    print("  Mejores params (Grid):", grid.best_params_)
    y_pred = grid.predict(x_test)
    print(classification_report(y_test, y_pred))

    # Randomized Search
    print("🎲 RandomizedSearchCV:")
    random_search = RandomizedSearchCV(pipeline, parametros, n_iter=5, cv=5, n_jobs=-1, random_state=42)
    random_search.fit(x_train, y_train)
    print("  Mejor score (Random):", random_search.best_score_)
    print("  Mejores params (Random):", random_search.best_params_)
    y_pred_random = random_search.predict(x_test)
    print(classification_report(y_test, y_pred_random))



🧪 Modelo: DecisionTree
🔍 GridSearchCV:
  Mejor score (Grid): 0.9428571428571428
  Mejores params (Grid): {'clf__criterion': 'entropy', 'clf__max_depth': 3, 'clf__min_samples_split': 2}
              precision    recall  f1-score   support

           0       1.00      0.91      0.95        43
           1       0.95      1.00      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.95      0.96       114
weighted avg       0.97      0.96      0.96       114

🎲 RandomizedSearchCV:
  Mejor score (Random): 0.9318681318681319
  Mejores params (Random): {'clf__min_samples_split': 2, 'clf__max_depth': 3, 'clf__criterion': 'gini'}
              precision    recall  f1-score   support

           0       0.95      0.91      0.93        43
           1       0.95      0.97      0.96        71

    accuracy                           0.95       114
   macro avg       0.95      0.94      0.94       114
weighted avg       0.95      0.95      0.95   

