# Unfinished FeatureSelector

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_selection import chi2, f_classif, f_regression, mutual_info_classif, mutual_info_regression
from statsmodels.stats.multitest import multipletests
import numpy as np

class FeatureSelector(BaseEstimator, TransformerMixin):
    def __init__(self, tests_to_features, alpha=0.05):
        self.tests_to_features = tests_to_features
        self.alpha = alpha
        self.selected_features_ = []

    def fit(self, X, y=None):
        p_values = []
        feature_indices = []

        # Map the test functions to their corresponding sklearn function
        test_functions = {
            'chi2': chi2,
            'f_classif': f_classif,
            'f_regression': f_regression,
            'mutual_info_classif': mutual_info_classif,
            'mutual_info_regression': mutual_info_regression
        }

        # Perform each test and collect p-values
        for test, features in self.tests_to_features.items():
            if test in test_functions:
                # Get the indices of the features for this test
                indices = [X.columns.get_loc(feature) for feature in features]
                # Perform the test
                test_func = test_functions[test]
                scores, pvals = test_func(X.iloc[:, indices], y)
                p_values.extend(pvals)
                feature_indices.extend(indices)

        # Apply FDR correction
        _, pvals_corrected, _, _ = multipletests(p_values, alpha=self.alpha, method='fdr_bh')
        
        # Select features whose corrected p-values are below the alpha threshold
        self.selected_features_ = [feature_indices[i] for i, pval in enumerate(pvals_corrected) if pval < self.alpha]

        return self

    def transform(self, X):
        # Return a new dataset containing only the selected features
        return X.iloc[:, self.selected_features_]

# Example usage:
# selector = FeatureSelector({'chi2': ['feature1', 'feature2'], 'f_classif': ['feature3', 'feature4']}, alpha=0.05)
# X_new = selector.fit_transform(X, y)