In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier, VotingClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from scipy.stats import randint
import numpy as np

# Cargar los datos

data = pd.read_csv('iris-data.csv')

# Visualizar las primeras filas y la estructura de los datos
print(data.head())
print(data.info())

X = data.drop('Species', axis=1)  # Features
y = data['Species']  # Target variable

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Entrenar modelo de Árbol de Decisión
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Evaluar modelo de Árbol de Decisión
y_pred_dt = dt_model.predict(X_test)
acc_dt = accuracy_score(y_test, y_pred_dt)
print("Accuracy del Árbol de Decisión:", acc_dt)

# Entrenar modelo KNN
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)

# Evaluar modelo KNN
y_pred_knn = knn_model.predict(X_test)
acc_knn = accuracy_score(y_test, y_pred_knn)
print("Accuracy de KNN:", acc_knn)

# Entrenar modelo Red Neuronal
mlp_model = MLPClassifier(random_state=42)
mlp_model.fit(X_train, y_train)

# Evaluar modelo Red Neuronal
y_pred_mlp = mlp_model.predict(X_test)
acc_mlp = accuracy_score(y_test, y_pred_mlp)
print("Accuracy de Red Neuronal:", acc_mlp)

# Crear ensemble usando Boosting
ensemble_boosting = GradientBoostingClassifier(random_state=42)
ensemble_boosting.fit(X_train, y_train)

# Evaluar ensemble usando Boosting
y_pred_boosting = ensemble_boosting.predict(X_test)
acc_boosting = accuracy_score(y_test, y_pred_boosting)
print("Accuracy del ensemble usando Boosting:", acc_boosting)

# Crear ensemble usando VotingClassifier
ensemble_voting = VotingClassifier(estimators=[
    ('dt', dt_model),
    ('knn', knn_model),
    ('mlp', mlp_model)
], voting='hard')
ensemble_voting.fit(X_train, y_train)

# Evaluar ensemble usando VotingClassifier
y_pred_voting = ensemble_voting.predict(X_test)
acc_voting = accuracy_score(y_test, y_pred_voting)
print("Accuracy del ensemble usando VotingClassifier:", acc_voting)

# Definir espacio de búsqueda de parámetros
param_grid_dt = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 10, 20, 30, 40, 50],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

# Instanciar GridSearchCV
grid_search_dt = GridSearchCV(estimator=DecisionTreeClassifier(random_state=42),
                              param_grid=param_grid_dt,
                              scoring='accuracy',
                              cv=5,
                              verbose=1,
                              n_jobs=-1)
grid_search_dt.fit(X_train, y_train)

# Mejor combinación de parámetros
best_params_dt = grid_search_dt.best_params_
print("Mejor combinación de parámetros para Árbol de Decisión:", best_params_dt)

# Evaluación del mejor modelo
best_dt_model = grid_search_dt.best_estimator_
y_pred_best_dt = best_dt_model.predict(X_test)
acc_best_dt = accuracy_score(y_test, y_pred_best_dt)
print("Accuracy del mejor modelo de Árbol de Decisión:", acc_best_dt)

# Definir espacio de búsqueda de parámetros
param_dist_knn = {
    'n_neighbors': randint(1, 20),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

# Instanciar RandomizedSearchCV
random_search_knn = RandomizedSearchCV(estimator=KNeighborsClassifier(),
                                       param_distributions=param_dist_knn,
                                       n_iter=100,
                                       scoring='accuracy',
                                       cv=5,
                                       verbose=1,
                                       n_jobs=-1,
                                       random_state=42)
random_search_knn.fit(X_train, y_train)

# Mejor combinación de parámetros
best_params_knn = random_search_knn.best_params_
print("Mejor combinación de parámetros para KNN:", best_params_knn)

# Evaluación del mejor modelo
best_knn_model = random_search_knn.best_estimator_
y_pred_best_knn = best_knn_model.predict(X_test)
acc_best_knn = accuracy_score(y_test, y_pred_best_knn)
print("Accuracy del mejor modelo de KNN:", acc_best_knn)

# Definir espacio de búsqueda de parámetros
param_grid_mlp = {
    'hidden_layer_sizes': [(50,), (100,), (150,), (200,)],
    'activation': ['relu', 'tanh', 'logistic'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'adaptive']
}

# Instanciar GridSearchCV
grid_search_mlp = GridSearchCV(estimator=MLPClassifier(random_state=42),
                               param_grid=param_grid_mlp,
                               scoring='accuracy',
                               cv=5,
                               verbose=1,
                               n_jobs=-1)
grid_search_mlp.fit(X_train, y_train)

# Mejor combinación de parámetros
best_params_mlp = grid_search_mlp.best_params_
print("Mejor combinación de parámetros para Red Neuronal:", best_params_mlp)

# Evaluación del mejor modelo
best_mlp_model = grid_search_mlp.best_estimator_
y_pred_best_mlp = best_mlp_model.predict(X_test)
acc_best_mlp = accuracy_score(y_test, y_pred_best_mlp)
print("Accuracy del mejor modelo de Red Neuronal:", acc_best_mlp)

   Sepal Length  Sepal Width  Petal Length  Petal Width      Species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Sepal Length  150 non-null    float64
 1   Sepal Width   150 non-null    float64
 2   Petal Length  150 non-null    float64
 3   Petal Width   150 non-null    float64
 4   Species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
None
Accuracy del Árbol de Decisión: 1.0
Accuracy de KNN: 1.0




Accuracy de Red Neuronal: 1.0
Accuracy del ensemble usando Boosting: 1.0
Accuracy del ensemble usando VotingClassifier: 1.0
Fitting 5 folds for each of 324 candidates, totalling 1620 fits


540 fits failed out of a total of 1620.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
436 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Protoss\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Protoss\anaconda3\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "C:\Users\Protoss\anaconda3\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\Protoss\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise Inva

Mejor combinación de parámetros para Árbol de Decisión: {'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 5}
Accuracy del mejor modelo de Árbol de Decisión: 0.9666666666666667
Fitting 5 folds for each of 100 candidates, totalling 500 fits
Mejor combinación de parámetros para KNN: {'metric': 'manhattan', 'n_neighbors': 14, 'weights': 'uniform'}
Accuracy del mejor modelo de KNN: 1.0
Fitting 5 folds for each of 144 candidates, totalling 720 fits
Mejor combinación de parámetros para Red Neuronal: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (100,), 'learning_rate': 'constant', 'solver': 'adam'}
Accuracy del mejor modelo de Red Neuronal: 1.0


