In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, StackingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV


# Clase para el clasificador RandomForest
class RandomForest:
    def __init__(self, n_estimators, criterion, max_depth, min_samples_split, min_samples_leaf, 
                 max_features, bootstrap, learning_rate, subsample, colsample_bytree, num_leaves, gamma,
                 objective='binary:logistic', max_leaf_nodes=None, min_impurity_decrease=0.0,
                 min_weight_fraction_leaf=0.0, min_child_samples=20, feature_fraction=0.6, metric='logloss'):
        self.n_estimators = n_estimators
        self.criterion = criterion
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.max_features = max_features
        self.bootstrap = bootstrap
        self.random_state = 42
        self.stacking_model = None  
        self.predictions = []
        self.learning_rate = learning_rate  
        self.subsample = subsample  
        self.colsample_bytree = colsample_bytree  
        self.num_leaves = num_leaves
        self.gamma = gamma
        self.objective = objective
        self.max_leaf_nodes = max_leaf_nodes
        self.min_impurity_decrease = min_impurity_decrease
        self.min_weight_fraction_leaf = min_weight_fraction_leaf
        self.min_child_samples = min_child_samples
        self.feature_fraction = feature_fraction
        self.metric = metric
        self.models = []  # Lista para almacenar los árboles

    def fit(self, X, y):        
        # Definir el clasificador de nivel 1
        estimators = [
            ('xgb', XGBClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth, random_state=self.random_state,
                                  learning_rate=self.learning_rate, subsample=self.subsample, colsample_bytree=self.colsample_bytree,
                                  gamma=self.gamma, objective=self.objective, max_leaf_nodes=self.max_leaf_nodes,
                                  min_impurity_decrease=self.min_impurity_decrease, min_weight_fraction_leaf=self.min_weight_fraction_leaf,
                                  min_child_samples=self.min_child_samples, feature_fraction=self.feature_fraction, metric=self.metric)),
            
            ('lgbm', LGBMClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth, random_state=self.random_state,
                                    learning_rate=self.learning_rate, subsample=self.subsample, colsample_bytree=self.colsample_bytree,
                                    num_leaves=self.num_leaves, max_leaf_nodes=self.max_leaf_nodes,
                                    min_child_samples=self.min_child_samples, feature_fraction=self.feature_fraction)),
            
            ('extra_trees', ExtraTreesClassifier(n_estimators=self.n_estimators, criterion=self.criterion,
                                                 max_depth=self.max_depth, min_samples_split=self.min_samples_split,
                                                 min_samples_leaf=self.min_samples_leaf, max_features=self.max_features,
                                                 bootstrap=self.bootstrap, random_state=self.random_state))]
            
        # Definir el clasificador de nivel 2 (modelo base)
        rf_base = RandomForestClassifier(n_estimators=self.n_estimators, random_state=self.random_state)
        
        # Crear el modelo de stacking
        self.stacking_model = StackingClassifier(estimators=estimators, final_estimator=rf_base)
        self.stacking_model.fit(X, y)

    def predict(self, X):
        return self.stacking_model.predict(X)

    def feature_importances(self, X, y):
        self.fit(X, y)
        importances = np.zeros(X.shape[1])
        for tree in self.models:
            importances += tree.feature_importances_
        return importances / len(self.models)
    
    def get_params(self, deep=True):
        return {
            'n_estimators': self.n_estimators,
            'criterion': self.criterion,
            'max_depth': self.max_depth,
            'min_samples_split': self.min_samples_split,
            'min_samples_leaf': self.min_samples_leaf,
            'max_features': self.max_features,
            'bootstrap': self.bootstrap,
            'learning_rate': self.learning_rate,
            'subsample': self.subsample,
            'colsample_bytree': self.colsample_bytree,
            'num_leaves': self.num_leaves,
            'gamma': self.gamma,
            'objective': self.objective,
            'max_leaf_nodes': self.max_leaf_nodes,
            'min_impurity_decrease': self.min_impurity_decrease,
            'min_weight_fraction_leaf': self.min_weight_fraction_leaf,
            'min_child_samples': self.min_child_samples,
            'feature_fraction': self.feature_fraction,
            'metric': self.metric
        }

    def set_params(self, **params):
        for param, value in params.items():
            setattr(self, param, value)
        return self

# Clase para envolver RandomForest y aplicar PCA

class RandomForestClassifierWrapper:
    def __init__(self, X, y, feature_names=None, **kwargs):
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        self.feature_names = feature_names  
        self.kwargs = kwargs
        self.rf = RandomForest(**kwargs)
        self.selected_features = None

    def train(self):        
        # Definir el pipeline con PCA y el clasificador RandomForest
        pipeline = Pipeline([
            ('pca', PCA()),
            ('rf', self.rf)
        ])
    
        # Definir los parámetros a ajustar en el grid search
        param_grid = {
            'pca__n_components': [10, 20, 30, 42]  # Rango de valores de n_components
        }
    
        # Realizar la búsqueda en la grilla con validación cruzada
        grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='precision')
        grid_search.fit(self.X_train, self.y_train)
    
        # Obtener el mejor estimador (pipeline)
        best_pipeline = grid_search.best_estimator_
        self.rf = best_pipeline.named_steps['rf']
    
        # Obtener el mejor número de componentes principales (n_components)
        best_n_components = best_pipeline.named_steps['pca'].n_components
        print("\n**********************Mejor número de componentes principales (n_components):", best_n_components)
    
        # Aplicar PCA con el número óptimo de componentes
        pca = PCA(n_components=best_n_components)
        self.X_train = pca.fit_transform(self.X_train)
        self.X_test = pca.transform(self.X_test)
    
        # Entrenar el modelo RandomForest con los datos transformados
        self.rf.fit(self.X_train, self.y_train)
    
        # Obtener las características seleccionadas por PCA
        selected_features = pca.components_
        print("Características seleccionadas por PCA:")
        print(selected_features)
    
        # Si se proporcionan los nombres de las características, calcular la importancia de las características
        if self.feature_names is not None:
            importances = self.rf.feature_importances(self.X_train, self.y_train)
            feature_importance = sorted(zip(self.feature_names, importances), key=lambda x: x[1], reverse=True)
            self.selected_features = feature_importance
            print("Características seleccionadas según su importancia para la precisión:")
            print(self.selected_features)


    def evaluate(self):
        y_pred = self.rf.predict(self.X_test)
        accuracy = accuracy_score(self.y_test, y_pred)
        precision = precision_score(self.y_test, y_pred, average='macro')
        recall = recall_score(self.y_test, y_pred, average='macro')
        f1 = f1_score(self.y_test, y_pred, average='macro')
        return accuracy, precision, recall, f1

    def print_selected_features(self):
        if self.selected_features is not None:
            print("Características seleccionadas según su importancia para la precisión:")
            for i, (feature, importance) in enumerate(self.selected_features):
                print(f"Característica {i+1}: {feature} - Importancia: {importance}")
        else:
            print("No se han seleccionado características.")

# Leer datos
df = pd.read_excel("C:\\Users\\klgt1\\Downloads\\dataset_BALANCEADO.xlsx")
X = df.drop('CONDUCTA', axis=1)
y = df['CONDUCTA']
feature_names = X.columns.tolist()  

# Leer los parámetros desde el archivo Excel
parametros_df = pd.read_excel("\\Users\\klgt1\\Downloads\\ParametrosOptimización.xlsx")
print(parametros_df)
# Obtener el primer conjunto de parámetros
parametros = parametros_df.iloc[0].to_dict()

# Ejecutar el clasificador con los parámetros actuales
wrapper = RandomForestClassifierWrapper(X, y, feature_names=feature_names, **parametros)
wrapper.train()
wrapper.print_selected_features()

# Obtener los resultados
accuracy, precision, recall, f1_score = wrapper.evaluate()

# Imprimir métricas
print("Métricas:")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1_score}")


# Calcular la matriz de confusión
conf_matrix = confusion_matrix(y_test, y_pred)

# Visualizar la matriz de confusión
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(y), yticklabels=np.unique(y))
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()
