# Carga de Librerías

In [1]:
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
from sklearn.exceptions import FitFailedWarning
warnings.filterwarnings('ignore', category=FitFailedWarning)
warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=FutureWarning)


from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier





# Carga de base de datos

In [2]:
df = pd.read_csv('fase_1_proy.csv')

# Convertir la columna 'Density (P/Km2)' a float64
df['Density (P/Km2)'] = df['Density (P/Km2)'].str.replace(',', '').astype(float)

# Preparación de datos en X y Y para la creación de modelos

In [3]:
# Seleccionar características (X) y la variable objetivo (y)
X = df.drop(['Pollution_Status', 'No', 'Entity'], axis=1)
y = df['Pollution_Status']

# Codificar las etiquetas si son categóricas
if y.dtype == 'object':
    le = LabelEncoder()
    y = le.fit_transform(y)

# Dividir el conjunto de datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# a. NAIVE BAYES

### Modelo general (Este tipo de modelo solo tiene 2 hyper-parámetros a modificar al menos que sea mulmonialNB)

In [4]:
# Crear el modelo Naive Bayes
nb = GaussianNB()

# Entrenar el modelo
nb.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = nb.predict(X_test)

# Evaluar el modelo
accuracyNB = accuracy_score(y_test, y_pred)
print(f'Accuracy Naive Bayes: {accuracyNB:.4f}')

Accuracy Naive Bayes: 0.9260


### Hyper-Parámetro 1

In [5]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5]
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=GaussianNB(), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyNB1 = accuracy_score(y_test, y_pred)
print(f'Accuracy Naive Bayes hyper 1: {accuracyNB1:.4f}')

Accuracy Naive Bayes hyper 1: 0.9260


### Hyper-Parámetro 2

In [6]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'priors': [None, [0.5, 0.5]]
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=GaussianNB(), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyNB2 = accuracy_score(y_test, y_pred)
print(f'Accuracy Naive Bayes hyper 2: {accuracyNB2:.4f}')

Accuracy Naive Bayes hyper 2: 0.9274


# b. LDA

### Modelo General

In [7]:
# Crear el modelo LDA
LDA = LinearDiscriminantAnalysis()

# Entrenar el modelo
LDA.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = LDA.predict(X_test)

# Evaluar el modelo
accuracyLDA = accuracy_score(y_test, y_pred)
print(f'Accuracy LDA: {accuracyLDA:.4f}')

Accuracy LDA: 0.7973


### Hyper-Parámetro 1

In [8]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'solver': ['svd', 'lsqr', 'eigen']
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LinearDiscriminantAnalysis(), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyLDA1 = accuracy_score(y_test, y_pred)
print(f'Accuracy LDA hyper 1: {accuracyLDA1:.4f}')

Accuracy LDA hyper 1: 0.7959


### Hyper-Parámetro 2

In [9]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'shrinkage': [None, 'auto', 0.1, 0.5, 0.9]
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LinearDiscriminantAnalysis(), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyLDA2 = accuracy_score(y_test, y_pred)
print(f'Accuracy LDA hyper 2: {accuracyLDA2:.4f}')

Accuracy LDA hyper 2: 0.7973


### Hyper-Parámetro 3

In [10]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'tol': [1e-4, 1e-3, 1e-2]
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LinearDiscriminantAnalysis(), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyLDA3 = accuracy_score(y_test, y_pred)
print(f'Accuracy LDA hyper 3: {accuracyLDA3:.4f}')

Accuracy LDA hyper 3: 0.7973


### Hyper-Parámetro 4

In [11]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'n_components': [None, 1, 2, 3]
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LinearDiscriminantAnalysis(), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyLDA4 = accuracy_score(y_test, y_pred)
print(f'Accuracy LDA hyper 4: {accuracyLDA4:.4f}')

Accuracy LDA hyper 4: 0.7973


### Hyper-Parámetro 5

In [12]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'store_covariance': [True, False]
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LinearDiscriminantAnalysis(), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyLDA5 = accuracy_score(y_test, y_pred)
print(f'Accuracy LDA hyper 5: {accuracyLDA5:.4f}')

Accuracy LDA hyper 5: 0.7973


# c. REGRESIÓN LOGÍSTICA

### Modelo General

In [13]:
# Crear el modelo Regresión Logística
RL = LogisticRegression()

# Entrenar el modelo
RL.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = RL.predict(X_test)

# Evaluar el modelo
accuracyRL = accuracy_score(y_test, y_pred)
print(f'Accuracy Regresión Logística: {accuracyRL:.4f}')

Accuracy Regresión Logística: 0.8658


### Hyper-Parámetro 1

In [14]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'penalty': ['l1', 'l2', 'elasticnet'] # Tipo de regularización
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LogisticRegression(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyRL1 = accuracy_score(y_test, y_pred)
print(f'Accuracy Regresión Logística hyper 1: {accuracyRL1:.4f}')

Accuracy Regresión Logística hyper 1: 0.8658


### Hyper-Parámetro 2

In [15]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'C': [0.001, 0.01, 0.1, 1.0, 10.0]  # Inversa de la fuerza de regularización
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LogisticRegression(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyRL2 = accuracy_score(y_test, y_pred)
print(f'Accuracy Regresión Logística hyper 2: {accuracyRL2:.4f}')

Accuracy Regresión Logística hyper 2: 0.8658


### Hyper-Parámetro 3

In [16]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'solver': ['liblinear', 'saga']   # Algoritmo de optimización
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LogisticRegression(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyRL3 = accuracy_score(y_test, y_pred)
print(f'Accuracy Regresión Logística hyper 3: {accuracyRL3:.4f}')

Accuracy Regresión Logística hyper 3: 0.9904


### Hyper-Parámetro 4

In [17]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'max_iter': [100, 200, 300]  # Número máximo de iteraciones
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LogisticRegression(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyRL4 = accuracy_score(y_test, y_pred)
print(f'Accuracy Regresión Logística hyper 4: {accuracyRL4:.4f}')

Accuracy Regresión Logística hyper 4: 1.0000


### Hyper-Parámetro 5

In [18]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'class_weight': [None, 'balanced', {0: 1, 1: 2}]  # Peso de las clases
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LogisticRegression(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyRL5 = accuracy_score(y_test, y_pred)
print(f'Accuracy Regresión Logística hyper 5: {accuracyRL5:.4f}')

Accuracy Regresión Logística hyper 5: 0.8658


# d. SVM

### Modelo General

In [19]:
# Crear el modelo SVM
SVM = SVC()

# Entrenar el modelo
SVM.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = SVM.predict(X_test)

# Evaluar el modelo
accuracySVM = accuracy_score(y_test, y_pred)
print(f'Accuracy SVM: {accuracySVM:.4f}')

Accuracy SVM: 0.6425


### Hyper-Parámetro 1

In [20]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100] # Parámetro de regularización
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=SVC(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracySVM1 = accuracy_score(y_test, y_pred)
print(f'Accuracy SVM hyper 1: {accuracySVM1:.4f}')

Accuracy SVM hyper 1: 0.6740


### Hyper-Parámetro 2

In [21]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'gamma': ['scale', 'auto', 0.1, 1, 10] # Coeficiente del kernel (para 'rbf' y 'poly')
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=SVC(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracySVM2 = accuracy_score(y_test, y_pred)
print(f'Accuracy SVM hyper 2: {accuracySVM2:.4f}')

Accuracy SVM hyper 2: 0.6425


### Hyper-Parámetro 3

In [22]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'degree': [2, 3, 4, 5] # Grado del polinomio (para 'poly')
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=SVC(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracySVM3 = accuracy_score(y_test, y_pred)
print(f'Accuracy SVM hyper 3: {accuracySVM3:.4f}')

Accuracy SVM hyper 3: 0.6425


### Hyper-Parámetro 4

In [23]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'class_weight': [None, 'balanced'] # Peso de las clases
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=SVC(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracySVM4 = accuracy_score(y_test, y_pred)
print(f'Accuracy SVM hyper 4: {accuracySVM4:.4f}')

Accuracy SVM hyper 4: 0.6425


### Hyper-Parámetro 5

In [24]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'coef0': [0.0, 0.1, 0.5, 1.0] # Parámetro independiente en kernel 'poly' y 'sigmoid'
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=SVC(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracySVM5 = accuracy_score(y_test, y_pred)
print(f'Accuracy SVM hyper 5: {accuracySVM5:.4f}')

Accuracy SVM hyper 5: 0.6425


# e. Árboles de Decisión

### Modelo General

In [25]:
# Crear el modelo Árboles de Decisión
AD = DecisionTreeClassifier()

# Entrenar el modelo
AD.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = AD.predict(X_test)

# Evaluar el modelo
accuracyAD = accuracy_score(y_test, y_pred)
print(f'Accuracy Árboles de Decisión: {accuracyAD:.4f}')

Accuracy Árboles de Decisión: 1.0000


### Hyper-Parámetro 1

In [26]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'criterion': ['gini', 'entropy'] # Criterio para medir la calidad de una división
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyAD1 = accuracy_score(y_test, y_pred)
print(f'Accuracy Árboles de Decisión 1: {accuracyAD1:.4f}')

Accuracy Árboles de Decisión 1: 1.0000


### Hyper-Parámetro 2

In [27]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'max_depth': [None, 10, 20, 30, 40] # Profundidad máxima del árbol
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyAD2 = accuracy_score(y_test, y_pred)
print(f'Accuracy Árboles de Decisión 2: {accuracyAD2:.4f}')

Accuracy Árboles de Decisión 2: 1.0000


### Hyper-Parámetro 3

In [28]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'min_samples_split': [2, 5, 10] # Número mínimo de muestras requeridas para dividir un nodo interno
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyAD3 = accuracy_score(y_test, y_pred)
print(f'Accuracy Árboles de Decisión 3: {accuracyAD3:.4f}')

Accuracy Árboles de Decisión 3: 1.0000


### Hyper-Parámetro 4

In [29]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'min_samples_leaf': [1, 2, 4] # Número mínimo de muestras requeridas para ser un nodo hoja
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyAD4 = accuracy_score(y_test, y_pred)
print(f'Accuracy Árboles de Decisión 4: {accuracyAD4:.4f}')

Accuracy Árboles de Decisión 4: 1.0000


### Hyper-Parámetro 5

In [30]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'max_features': ['auto', 'sqrt', 'log2'] # Número máximo de características a considerar en cada división
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyAD5 = accuracy_score(y_test, y_pred)
print(f'Accuracy Árboles de Decisión 5: {accuracyAD5:.4f}')

Accuracy Árboles de Decisión 5: 0.9877


# Random Forest

### Modelo General

In [31]:
# Crear el modelo Random Forest
RF = RandomForestClassifier()

# Entrenar el modelo
RF.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = RF.predict(X_test)

# Evaluar el modelo
accuracyRF = accuracy_score(y_test, y_pred)
print(f'Accuracy Random Forest: {accuracyRF:.4f}')

Accuracy Random Forest: 1.0000


### Hyper-Parámetro 1

In [32]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'n_estimators': [100, 200, 300] # Número de árboles en el bosque
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyRF1 = accuracy_score(y_test, y_pred)
print(f'Accuracy Random Forest 1: {accuracyRF1:.4f}')

Accuracy Random Forest 1: 1.0000


### Hyper-Parámetro 2

In [33]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'criterion': ['gini', 'entropy'] # Criterio para medir la calidad de una división
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyRF2 = accuracy_score(y_test, y_pred)
print(f'Accuracy Random Forest 2: {accuracyRF2:.4f}')

Accuracy Random Forest 2: 1.0000


### Hyper-Parámetro 3

In [34]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'max_depth': [None, 10, 20, 30] # Profundidad máxima del árbol
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyRF3 = accuracy_score(y_test, y_pred)
print(f'Accuracy Random Forest 3: {accuracyRF3:.4f}')

Accuracy Random Forest 3: 1.0000


### Hyper-Parámetro 4

In [35]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'min_samples_split': [2, 5, 10] # Número mínimo de muestras requeridas para dividir un nodo interno
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyRF4 = accuracy_score(y_test, y_pred)
print(f'Accuracy Random Forest 4: {accuracyRF4:.4f}')

Accuracy Random Forest 4: 1.0000


### Hyper-Parámetro 5

In [36]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'max_features': ['auto', 'sqrt', 'log2'] # Número máximo de características a considerar en cada división
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyRF5 = accuracy_score(y_test, y_pred)
print(f'Accuracy Random Forest 5: {accuracyRF5:.4f}')

Accuracy Random Forest 5: 1.0000


# Análisis de discriminante lineal

### Modelo General

In [37]:
# Crear el modelo LDA
lda = LinearDiscriminantAnalysis()

# Entrenar el modelo
lda.fit(X_train, y_train)

# Predecir con el conjunto de prueba
y_pred = lda.predict(X_test)

# Evaluar el modelo
accuracyADL = accuracy_score(y_test, y_pred)
print(f'Accuracy ADL: {accuracyADL:.4f}')

Accuracy ADL: 0.7973


### Hyper-Parámetro 1

In [38]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'solver': ['svd', 'lsqr', 'eigen'],
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LinearDiscriminantAnalysis(), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)



# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyADL_optimized1 = accuracy_score(y_test, y_pred)
print(f'Accuracy ADL hyper 1: {accuracyADL_optimized1:.4f}')

Accuracy ADL hyper 1: 0.7959


### Hyper-Parámetro 2

In [39]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
    'shrinkage': [None, 'auto', 0.1, 0.5, 0.9] if 'lsqr' in ['lsqr', 'eigen'] else [None],
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LinearDiscriminantAnalysis(), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)


# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyADL_optimized2 = accuracy_score(y_test, y_pred)
print(f'Accuracy ADL hyper 2: {accuracyADL_optimized2:.4f}')

Accuracy ADL hyper 2: 0.7973


### Hyper-Parámetro 3

In [40]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
     'priors': [None, [0.5, 0.5]],
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LinearDiscriminantAnalysis(), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)


# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyADL_optimized3 = accuracy_score(y_test, y_pred)
print(f'Accuracy ADL hyper 3: {accuracyADL_optimized3:.4f}')

Accuracy ADL hyper 3: 0.7973


### Hyper-Parámetro 4

In [41]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
     'n_components': [None, 1, 2],
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LinearDiscriminantAnalysis(), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)


# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyADL_optimized4 = accuracy_score(y_test, y_pred)
print(f'Accuracy ADL hyper 4: {accuracyADL_optimized4:.4f}')

Accuracy ADL hyper 4: 0.7973


### Hyper-Parámetro 5

In [42]:
# Definir diferentes configuraciones de hiperparámetros
param_grid = {
     'store_covariance': [True, False]
}

# Configurar la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=LinearDiscriminantAnalysis(), param_grid=param_grid, cv=5, scoring='accuracy')

# Entrenar usando búsqueda en cuadrícula
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)

# Evaluar el modelo
accuracyADL_optimized5 = accuracy_score(y_test, y_pred)
print(f'Accuracy ADL hyper 5: {accuracyADL_optimized5:.4f}')

Accuracy ADL hyper 5: 0.7973


# Análisis de discriminante cuadrático

### Modelo General

In [43]:
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, y_train)
y_pred = qda.predict(X_test)
accuracyQDA = accuracy_score(y_test, y_pred)
print(f'Accuracy QDA: {accuracyQDA:.4f}')

Accuracy QDA: 0.9644


### Hyper-Parámetro 1

In [44]:
param_grid = {
    'reg_param': [0.0, 0.1, 0.5, 0.9, 1.0],  # Parámetro de regularización
    
}

grid_search = GridSearchCV(estimator=QuadraticDiscriminantAnalysis(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyQDA_optimized1 = accuracy_score(y_test, y_pred)
print(f'Accuracy QDA hyper 1: {accuracyQDA_optimized1:.4f}')

Accuracy QDA hyper 1: 0.9644


### Hyper-Parámetro 2

In [45]:
param_grid = {
    'priors': [None, [0.5, 0.5]],  # Probabilidades a priori de las clases
    
}

grid_search = GridSearchCV(estimator=QuadraticDiscriminantAnalysis(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)


# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyQDA_optimized2 = accuracy_score(y_test, y_pred)
print(f'Accuracy QDA hyper 2: {accuracyQDA_optimized2:.4f}')

Accuracy QDA hyper 2: 0.9644


### Hyper-Parámetro 3

In [46]:
param_grid = {
    
    'store_covariance': [True, False],  # Indicar si almacenar las matrices de covarianza
    
}

grid_search = GridSearchCV(estimator=QuadraticDiscriminantAnalysis(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)


# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyQDA_optimized3 = accuracy_score(y_test, y_pred)
print(f'Accuracy QDA hyper 3: {accuracyQDA_optimized3:.4f}')

Accuracy QDA hyper 3: 0.9644


### Hyper-Parámetro 4

In [47]:
param_grid = {
    
    'tol': [1e-4, 1e-3, 1e-2, 1e-1]  # Tolerancia para detener la iteración
}

grid_search = GridSearchCV(estimator=QuadraticDiscriminantAnalysis(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)


# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyQDA_optimized4 = accuracy_score(y_test, y_pred)
print(f'Accuracy QDA hyper 4: {accuracyQDA_optimized4:.4f}')

Accuracy QDA hyper 4: 0.9644


# AdaBoost

### Modelo General

In [48]:
ada = AdaBoostClassifier()
ada.fit(X_train, y_train)
y_pred = ada.predict(X_test)
accuracyAda = accuracy_score(y_test, y_pred)
print(f'Accuracy AdaBoost: {accuracyAda:.4f}')

Accuracy AdaBoost: 1.0000


### Hyper-Parámetro 1

In [49]:
param_grid = {
    'n_estimators': [50, 100, 200],  # Número de estimadores
    
}

grid_search = GridSearchCV(estimator=AdaBoostClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)


# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyAda_optimized1 = accuracy_score(y_test, y_pred)
print(f'Accuracy AdaBoost hyper 1: {accuracyAda_optimized1:.4f}')

Accuracy AdaBoost hyper 1: 1.0000


### Hyper-Parámetro 2

In [50]:
param_grid = {
    
    'learning_rate': [0.01, 0.1, 1, 10],  # Tasa de aprendizaje
    
}

grid_search = GridSearchCV(estimator=AdaBoostClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)


# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyAda_optimized2 = accuracy_score(y_test, y_pred)
print(f'Accuracy AdaBoost hyper 2: {accuracyAda_optimized2:.4f}')

Accuracy AdaBoost hyper 2: 1.0000


### Hyper-Parámetro 3

In [51]:
param_grid = {
 
    'algorithm': ['SAMME', 'SAMME.R']  # Algoritmo de aumento
}

grid_search = GridSearchCV(estimator=AdaBoostClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)


# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyAda_optimized3 = accuracy_score(y_test, y_pred)
print(f'Accuracy AdaBoost hyper 3: {accuracyAda_optimized3:.4f}')

Accuracy AdaBoost hyper 3: 1.0000


# Gradient Boosting

### Modelo general 

In [52]:
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)
y_pred = gb.predict(X_test)
accuracyGB = accuracy_score(y_test, y_pred)
print(f'Accuracy Gradient Boosting: {accuracyGB:.4f}')

Accuracy Gradient Boosting: 1.0000


### Hyper-Parámetro 1

In [53]:
param_grid = {
    'n_estimators': [50, 100, 200],  # Número de árboles de decisión

}

grid_search = GridSearchCV(estimator=GradientBoostingClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)


# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyGB_optimized1 = accuracy_score(y_test, y_pred)
print(f'Accuracy Gradient Boosting hyper 1: {accuracyGB_optimized1:.4f}')

Accuracy Gradient Boosting hyper 1: 1.0000


### Hyper-Parámetro 2

In [54]:
param_grid = {
    
    'learning_rate': [0.01, 0.1, 0.5],  # Tasa de aprendizaje
   
}

grid_search = GridSearchCV(estimator=GradientBoostingClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)



# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyGB_optimized2 = accuracy_score(y_test, y_pred)
print(f'Accuracy Gradient Boosting hyper 2: {accuracyGB_optimized2:.4f}')

Accuracy Gradient Boosting hyper 2: 1.0000


### Hyper-Parámetro 3

In [55]:
param_grid = {
    
    'max_depth': [3, 5, 7],  # Profundidad máxima del árbol

}

grid_search = GridSearchCV(estimator=GradientBoostingClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)



# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyGB_optimized3 = accuracy_score(y_test, y_pred)
print(f'Accuracy Gradient Boosting hyper 3: {accuracyGB_optimized3:.4f}')

Accuracy Gradient Boosting hyper 3: 1.0000


### Hyper-Parámetro 4

In [56]:
param_grid = {

    'min_samples_split': [2, 5, 10],  # Número mínimo de muestras requeridas para dividir un nodo

}

grid_search = GridSearchCV(estimator=GradientBoostingClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)



# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyGB_optimized4 = accuracy_score(y_test, y_pred)
print(f'Accuracy Gradient Boosting hyper 4: {accuracyGB_optimized4:.4f}')

Accuracy Gradient Boosting hyper 4: 1.0000


### Hyper-Parámetro 5

In [57]:
param_grid = {

    'min_samples_leaf': [1, 2, 4],  # Número mínimo de muestras requeridas en cada nodo hoja
    
}

grid_search = GridSearchCV(estimator=GradientBoostingClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)



# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyGB_optimized5 = accuracy_score(y_test, y_pred)
print(f'Accuracy Gradient Boosting hyper 5: {accuracyGB_optimized5:.4f}')

Accuracy Gradient Boosting hyper 5: 1.0000


#  XGBoost

### Modelo general

In [58]:
xgb = XGBClassifier()
xgb.fit(X_train, y_train)
y_pred = xgb.predict(X_test)
accuracyXGB = accuracy_score(y_test, y_pred)
print(f'Accuracy XGBoost: {accuracyXGB:.4f}')

Accuracy XGBoost: 0.9973


### Hyper-Parámetro 1

In [59]:
param_grid = {
    'n_estimators': [50, 100, 200],  # Número de árboles de decisión
    
}

grid_search = GridSearchCV(estimator=XGBClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)



# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyXGB_optimized1 = accuracy_score(y_test, y_pred)
print(f'Accuracy XGBoost hyper 1: {accuracyXGB_optimized1:.4f}')

Accuracy XGBoost hyper 1: 0.9973


### Hyper-Parámetro 2

In [60]:
param_grid = {
    
    'learning_rate': [0.01, 0.1, 0.5],  # Tasa de aprendizaje
   
}

grid_search = GridSearchCV(estimator=XGBClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)



# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyXGB_optimized2 = accuracy_score(y_test, y_pred)
print(f'Accuracy XGBoost hyper 2: {accuracyXGB_optimized2:.4f}')

Accuracy XGBoost hyper 2: 0.9986


### Hyper-Parámetro 3

In [61]:
param_grid = {
    
    'max_depth': [3, 5, 7],  # Profundidad máxima del árbol
    
}

grid_search = GridSearchCV(estimator=XGBClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)



# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyXGB_optimized3 = accuracy_score(y_test, y_pred)
print(f'Accuracy XGBoost hyper 3: {accuracyXGB_optimized3:.4f}')

Accuracy XGBoost hyper 3: 0.9973


### Hyper-Parámetro 4

In [62]:
param_grid = {
    
    'subsample': [0.8, 1.0],  # Fracción de muestras a utilizar para entrenar cada árbol
 
}

grid_search = GridSearchCV(estimator=XGBClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)



# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyXGB_optimized4 = accuracy_score(y_test, y_pred)
print(f'Accuracy XGBoost hyper 4: {accuracyXGB_optimized4:.4f}')

Accuracy XGBoost hyper 4: 0.9973


### Hyper-Parámetro 5

In [63]:
param_grid = {

    'colsample_bytree': [0.8, 1.0],  # Fracción de columnas a utilizar para entrenar cada árbol

}

grid_search = GridSearchCV(estimator=XGBClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)


# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyXGB_optimized5 = accuracy_score(y_test, y_pred)
print(f'Accuracy XGBoost hyper 5: {accuracyXGB_optimized5:.4f}')

Accuracy XGBoost hyper 5: 0.9986


# LGBM

### Modelo geneal

In [64]:
lgbm = LGBMClassifier()
lgbm.fit(X_train, y_train)
y_pred = lgbm.predict(X_test)
accuracyLGBM = accuracy_score(y_test, y_pred)
print(f'Accuracy LGBM: {accuracyLGBM:.4f}')

[LightGBM] [Info] Number of positive: 1012, number of negative: 1906
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001005 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4400
[LightGBM] [Info] Number of data points in the train set: 2918, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.346813 -> initscore=-0.633078
[LightGBM] [Info] Start training from score -0.633078
Accuracy LGBM: 0.9986


### Hyper-Parámetro 1

In [65]:
param_grid = {
    'n_estimators': [50, 100, 200],  # Número de árboles de decisión

}

grid_search = GridSearchCV(estimator=LGBMClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)


# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyLGBM_optimized1 = accuracy_score(y_test, y_pred)
print(f'Accuracy LGBM hyper 1: {accuracyLGBM_optimized1:.4f}')

[LightGBM] [Info] Number of positive: 809, number of negative: 1525
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000731 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4369
[LightGBM] [Info] Number of data points in the train set: 2334, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.346615 -> initscore=-0.633951
[LightGBM] [Info] Start training from score -0.633951
[LightGBM] [Info] Number of positive: 809, number of negative: 1525
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000281 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4364
[LightGBM] [Info] Number of data points in the train set: 2334, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.346615 -> initscore=-0.633951
[LightGBM] [Info] Start training from score -0.633951
[LightGBM] [Info] Nu

### Hyper-Parámetro 2

In [66]:
param_grid = {
    'learning_rate': [0.01, 0.1, 0.5],  # Tasa de aprendizaje

}

grid_search = GridSearchCV(estimator=LGBMClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)


# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyLGBM_optimized2 = accuracy_score(y_test, y_pred)
print(f'Accuracy LGBM with hyper 2: {accuracyLGBM_optimized2:.4f}')

[LightGBM] [Info] Number of positive: 809, number of negative: 1525
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000359 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4369
[LightGBM] [Info] Number of data points in the train set: 2334, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.346615 -> initscore=-0.633951
[LightGBM] [Info] Start training from score -0.633951
[LightGBM] [Info] Number of positive: 809, number of negative: 1525
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000354 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4364
[LightGBM] [Info] Number of data points in the train set: 2334, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.346615 -> initscore=-0.633951
[LightGBM] [Info] Start training from score -0.633951
[LightGBM] [Info] Nu

### Hyper-Parámetro 3

In [67]:
param_grid = {
    'max_depth': [3, 5, 7],  # Profundidad máxima del árbol

}

grid_search = GridSearchCV(estimator=LGBMClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)



# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyLGBM_optimized3 = accuracy_score(y_test, y_pred)
print(f'Accuracy LGBM hyper 3: {accuracyLGBM_optimized3:.4f}')

[LightGBM] [Info] Number of positive: 809, number of negative: 1525
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000296 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4369
[LightGBM] [Info] Number of data points in the train set: 2334, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.346615 -> initscore=-0.633951
[LightGBM] [Info] Start training from score -0.633951
[LightGBM] [Info] Number of positive: 809, number of negative: 1525
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000320 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4364
[LightGBM] [Info] Number of data points in the train set: 2334, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.346615 -> initscore=-0.633951
[LightGBM] [Info] Start training from score -0.633951
[LightGBM] [Info] Nu

### Hyper-Parámetro 4

In [68]:
param_grid = {
    
    'subsample': [0.8, 1.0],  # Fracción de muestras a utilizar para entrenar cada árbol
    
}

grid_search = GridSearchCV(estimator=LGBMClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)



# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyLGBM_optimized4 = accuracy_score(y_test, y_pred)
print(f'Accuracy LGBM hyper 4: {accuracyLGBM_optimized4:.4f}')

[LightGBM] [Info] Number of positive: 809, number of negative: 1525
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000421 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4369
[LightGBM] [Info] Number of data points in the train set: 2334, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.346615 -> initscore=-0.633951
[LightGBM] [Info] Start training from score -0.633951
[LightGBM] [Info] Number of positive: 809, number of negative: 1525
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000302 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4364
[LightGBM] [Info] Number of data points in the train set: 2334, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.346615 -> initscore=-0.633951
[LightGBM] [Info] Start training from score -0.633951
[LightGBM] [Info] Nu

### Hyper-Parámetro 5

In [69]:
param_grid = {
    'colsample_bytree': [0.8, 1.0],  # Fracción de columnas a utilizar para entrenar cada árbol
 
}

grid_search = GridSearchCV(estimator=LGBMClassifier(), param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)



# Predecir con el conjunto de prueba usando el mejor modelo
y_pred = grid_search.predict(X_test)
accuracyLGBM_optimized5 = accuracy_score(y_test, y_pred)
print(f'Accuracy LGBM hyper 5: {accuracyLGBM_optimized5:.4f}')

[LightGBM] [Info] Number of positive: 809, number of negative: 1525
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000376 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4369
[LightGBM] [Info] Number of data points in the train set: 2334, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.346615 -> initscore=-0.633951
[LightGBM] [Info] Start training from score -0.633951
[LightGBM] [Info] Number of positive: 809, number of negative: 1525
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000262 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4364
[LightGBM] [Info] Number of data points in the train set: 2334, number of used features: 20
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.346615 -> initscore=-0.633951
[LightGBM] [Info] Start training from score -0.633951
[LightGBM] [Info] Nu

# TABLA DE RESULTADOS

In [70]:
# Definir todas las variables de precisión
accuracyNB = 0.85
accuracyNB1 = 0.87
# ... y así sucesivamente para todas las variables
accuracyQDA_optimized4 = 0.90

# Crear diccionario con descripciones y puntuaciones
dataresultados = {
    "Descripción Modelo": [
        "Naive Bayes General", "Naive Bayes Hyper 1", "Naive Bayes Hyper 2",
        "LDA General", "LDA Hyper 1", "LDA Hyper 2", "LDA Hyper 3", "LDA Hyper 4", "LDA Hyper 5",
        "Regresión Logística General", "Regresión Logística Hyper 1", "Regresión Logística Hyper 2", "Regresión Logística Hyper 3", "Regresión Logística Hyper 4", "Regresión Logística Hyper 5",
        "SVM General", "SVM Hyper 1", "SVM Hyper 2", "SVM Hyper 3", "SVM Hyper 4", "SVM Hyper 5",
        "Árboles de Decisión General", "Árboles de Decisión General Hyper 1", "Árboles de Decisión General Hyper 2", "Árboles de Decisión General Hyper 3", "Árboles de Decisión General Hyper 4", "Árboles de Decisión General Hyper 5",
        "Random Forest General", "Random Forest General Hyper 1", "Random Forest General Hyper 2", "Random Forest General Hyper 3", "Random Forest General Hyper 4", "Random Forest General Hyper 5", "Análisis de discriminante lineal General", 
        "Análisis de discriminante lineal Hyper 1","Análisis de discriminante lineal Hyper 2","Análisis de discriminante lineal Hyper 3","Análisis de discriminante lineal Hyper 4","Análisis de discriminante lineal Hyper 2",
        "Análisis de discriminante cuadrático", "Análisis de discriminante cuadrático Hyper 1", "Análisis de discriminante cuadrático Hyper 2", "Análisis de discriminante cuadrático Hyper 3", "Análisis de discriminante cuadrático Hyper 4",
        "DaBoost General", "daBoost Hyper 1", "daBoost Hyper 2", "daBoost Hyper 3",
        "Gradient Boosting General", "Gradient Boosting Hyper 1", "Gradient Boosting Hyper 2", "Gradient Boosting Hyper 3", "Gradient Boosting Hyper 4", "Gradient Boosting Hyper 5",
        "XGBoost General", "XGBoost Hyper 1", "XGBoost Hyper 2", "XGBoost Hyper 3", "XGBoost Hyper 4", "XGBoost Hyper 5",
        "LGBM General", "LGBM Hyper 1", "LGBM Hyper 2", "LGBM Hyper 3", "LGBM Hyper 4", "LGBM Hyper 5"
    ],
    "Calidad de Predicción": [
        accuracyNB, accuracyNB1, accuracyNB2,
        accuracyLDA, accuracyLDA1, accuracyLDA2, accuracyLDA3, accuracyLDA4, accuracyLDA5,
        accuracyRL, accuracyRL1, accuracyRL2, accuracyRL3, accuracyRL4, accuracyRL5,
        accuracySVM, accuracySVM1, accuracySVM2, accuracySVM3, accuracySVM4, accuracySVM5,
        accuracyAD, accuracyAD1, accuracyAD2, accuracyAD3, accuracyAD4, accuracyAD5,
        accuracyRF, accuracyRF1, accuracyRF2, accuracyRF3, accuracyRF4, accuracyRF5,
        accuracyADL, accuracyADL_optimized1, accuracyADL_optimized2, accuracyADL_optimized3, accuracyADL_optimized4, accuracyADL_optimized5,
        accuracyQDA, accuracyQDA_optimized1, accuracyQDA_optimized2, accuracyQDA_optimized3, accuracyQDA_optimized4,
        accuracyAda, accuracyAda_optimized1, accuracyAda_optimized2, accuracyAda_optimized3,
        accuracyGB, accuracyGB_optimized1, accuracyGB_optimized2, accuracyGB_optimized3, accuracyGB_optimized4, accuracyGB_optimized5,
        accuracyXGB, accuracyXGB_optimized1, accuracyXGB_optimized2, accuracyXGB_optimized3, accuracyXGB_optimized4, accuracyXGB_optimized5,
        accuracyLGBM, accuracyLGBM_optimized1, accuracyLGBM_optimized2, accuracyLGBM_optimized3, accuracyLGBM_optimized4, accuracyLGBM_optimized5
        
        
        
        
    ]
}

# Crear DataFrame
df = pd.DataFrame(dataresultados)

# Ordenar el DataFrame de mayor a menor según la puntuación
df_ordenado = df.sort_values(by="Calidad de Predicción", ascending=False)

df_ordenado.head(60)

Unnamed: 0,Descripción Modelo,Calidad de Predicción
23,Árboles de Decisión General Hyper 2,1.0
53,Gradient Boosting Hyper 5,1.0
22,Árboles de Decisión General Hyper 1,1.0
47,daBoost Hyper 3,1.0
24,Árboles de Decisión General Hyper 3,1.0
25,Árboles de Decisión General Hyper 4,1.0
27,Random Forest General,1.0
28,Random Forest General Hyper 1,1.0
29,Random Forest General Hyper 2,1.0
30,Random Forest General Hyper 3,1.0


In [71]:
df_ordenado.to_csv("Tabla modelos1.csv", index=False)

# Conclusión

Los resultados sugieren que los modelos basados en técnicas de ensamble (Random Forest, AdaBoost y Gradient Boosting) y los Árboles de Decisión puros son muy efectivos para este conjunto de datos y lograron una métrica optima pero sobre ajustada por lo tanto consideramos que los modelos adecuos para usar son lo del rango de "calidad de prediccion" de 0.86 a 0.96 Estos modelos deberían ser considerados para implementaciones futuras debido a su rendimiento consistente y superior.