In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectKBest, SelectPercentile, chi2, f_classif, mutual_info_classif, VarianceThreshold

class FeatureSelector:
    def __init__(self, dataset: pd.DataFrame):
        self.dataset = dataset.copy()
        self.selected_dataset = self.dataset.copy()

    def variance_threshold(self, config: dict):
        threshold = config.get('threshold', 0)
        selector = VarianceThreshold(threshold=threshold)
        selector.fit(self.dataset)
        self.selected_dataset = self.dataset.loc[:, selector.get_support()]
        return self.selected_dataset

    def select_k_best(self, config: dict, target: str):
        k = config.get('k', 10)
        score_func = config.get('score_func', f_classif)
        X = self.dataset.drop(columns=[target])
        y = self.dataset[target]
        selector = SelectKBest(score_func=score_func, k=k)
        X_new = selector.fit_transform(X, y)
        self.selected_dataset = pd.DataFrame(X_new, columns=X.columns[selector.get_support()])
        self.selected_dataset[target] = y.values
        return self.selected_dataset

    def select_percentile(self, config: dict, target: str):
        percentile = config.get('percentile', 10)
        score_func = config.get('score_func', f_classif)
        X = self.dataset.drop(columns=[target])
        y = self.dataset[target]
        selector = SelectPercentile(score_func=score_func, percentile=percentile)
        X_new = selector.fit_transform(X, y)
        self.selected_dataset = pd.DataFrame(X_new, columns=X.columns[selector.get_support()])
        self.selected_dataset[target] = y.values
        return self.selected_dataset

    def mutual_info(self, config: dict, target: str):
        k = config.get('k', 10)
        X = self.dataset.drop(columns=[target])
        y = self.dataset[target]
        scores = mutual_info_classif(X, y)
        top_idx = np.argsort(scores)[-k:]
        self.selected_dataset = X.iloc[:, top_idx]
        self.selected_dataset[target] = y.values
        return self.selected_dataset

    def chi_square(self, config: dict, target: str):
        k = config.get('k', 10)
        X = self.dataset.drop(columns=[target])
        y = self.dataset[target]
        selector = SelectKBest(score_func=chi2, k=k)
        X_new = selector.fit_transform(X, y)
        self.selected_dataset = pd.DataFrame(X_new, columns=X.columns[selector.get_support()])
        self.selected_dataset[target] = y.values
        return self.selected_dataset

    def anova_f(self, config: dict, target: str):
        k = config.get('k', 10)
        X = self.dataset.drop(columns=[target])
        y = self.dataset[target]
        selector = SelectKBest(score_func=f_classif, k=k)
        X_new = selector.fit_transform(X, y)
        self.selected_dataset = pd.DataFrame(X_new, columns=X.columns[selector.get_support()])
        self.selected_dataset[target] = y.values
        return self.selected_dataset

    def run_method(self, method_name: str, config: dict, target: str = None):
        method_map = {
            'VarianceThreshold': self.variance_threshold,
            'SelectKBest': self.select_k_best,
            'SelectPercentile': self.select_percentile,
            'Mutual Information': self.mutual_info,
            'Chi-Square': self.chi_square,
            'ANOVA F-value': self.anova_f
        }
        if method_name not in method_map:
            raise ValueError(f"Method {method_name} not supported")
        return method_map[method_name](config=config, target=target)
