In [6]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler
from boruta import BorutaPy
np.int = np.int32
np.float = np.float64
np.bool = np.bool_

def select_features(X, y, iterations=10):
    def boruta_selection(X, y):
        forest = RandomForestClassifier(n_jobs=-1, class_weight='balanced', max_depth=5)
        boruta = BorutaPy(forest, n_estimators='auto', verbose=0, random_state=42)
        boruta.fit(X, y)
        selected_features = np.where(boruta.support_)[0]
        return selected_features

    def rfe_selection(X, y):
        model = RandomForestClassifier(n_jobs=-1, class_weight='balanced', max_depth=5)
        rfe = RFE(model, n_features_to_select=5)
        rfe.fit(X, y)
        selected_features = np.where(rfe.support_)[0]
        return selected_features

    def correlation_selection(X, threshold=0.95):
        corr_matrix = np.corrcoef(X, rowvar=False)
        upper = np.triu(np.ones(corr_matrix.shape), k=1).astype(bool)
        to_drop = [column for column in range(corr_matrix.shape[1]) if any(corr_matrix[column, upper[column]] > threshold)]
        selected_features = [i for i in range(X.shape[1]) if i not in to_drop]
        return selected_features

    def lasso_selection(X, y):
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        lasso = LassoCV(cv=5, random_state=42).fit(X_scaled, y)
        selected_features = np.where(lasso.coef_ != 0)[0]
        return selected_features

    selected_features_list = []

    for _ in range(iterations):
        # Boruta and RFE and Correlation
        features = boruta_selection(X, y)
        X_boruta = X[:, features]
        features = rfe_selection(X_boruta, y)
        X_rfe = X_boruta[:, features]
        features = correlation_selection(X_rfe)
        selected_features_list.append(('Boruta + RFE + Correlation', features))
        
        # RFE and Boruta and Correlation
        features = rfe_selection(X, y)
        X_rfe = X[:, features]
        features = boruta_selection(X_rfe, y)
        X_boruta = X_rfe[:, features]
        features = correlation_selection(X_boruta)
        selected_features_list.append(('RFE + Boruta + Correlation', features))
        
        # RFE + Correlation
        features = rfe_selection(X, y)
        X_rfe = X[:, features]
        features = correlation_selection(X_rfe)
        selected_features_list.append(('RFE + Correlation', features))
        
        # Boruta + Correlation
        features = boruta_selection(X, y)
        X_boruta = X[:, features]
        features = correlation_selection(X_boruta)
        selected_features_list.append(('Boruta + Correlation', features))
        
        # Boruta + Lasso
        features = boruta_selection(X, y)
        X_boruta = X[:, features]
        features = lasso_selection(X_boruta, y)
        selected_features_list.append(('Boruta + Lasso', features))
        
        # Boruta + RFE + Lasso
        features = boruta_selection(X, y)
        X_boruta = X[:, features]
        features = rfe_selection(X_boruta, y)
        X_rfe = X_boruta[:, features]
        features = lasso_selection(X_rfe, y)
        selected_features_list.append(('Boruta + RFE + Lasso', features))
    
    return selected_features_list

# Dummy dataset (replace with your actual dataset)
X = np.loadtxt("../data/x_train.txt", delimiter=' ')
y = np.loadtxt("../data/y_train.txt", delimiter=' ')

selected_features = select_features(X, y, iterations=10)

# Print the selected features for each iteration
for method, features in selected_features:
    print(f"Method: {method}")
    print(f"Selected Features: {features}")
    print("-" * 30)
