[Reference](https://towardsdev.com/the-most-used-feature-selection-methods-c117273759f8)


# Univariate Selection Method


In [1]:
import seaborn as sns
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_classif,mutual_info_regression,f_classif
from sklearn.preprocessing import LabelEncoder
df_iris = sns.load_dataset('iris')
df_iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [2]:
#extract X features and y target
X = df_iris[["sepal_length","sepal_width","petal_length","petal_width"]].values
y = df_iris[["species"]].values
#SelectKBest can't work with str classes, it should be label encoded.
le = LabelEncoder()
y_encoded = le.fit_transform(y)

  y = column_or_1d(y, warn=True)


In [3]:
selection_params = {"score_func":f_classif,
                   "k":2}
selection = SelectKBest(**selection_params).fit(X,y_encoded)
X_new = selection.transform(X)
mask = selection.get_support()
df_iris_features = df_iris.iloc[:,:-1]
features = df_iris_features.iloc[:,mask].columns
print("Scores: {}".format(selection.scores_))
print("P Values: {}".format(selection.pvalues_))
print("Number Of Features Seen in Fit: {}".format(selection.n_features_in_))
print("Feature Names: {}".format(features))

Scores: [ 119.26450218   49.16004009 1180.16118225  960.0071468 ]
P Values: [1.66966919e-31 4.49201713e-17 2.85677661e-91 4.16944584e-85]
Number Of Features Seen in Fit: 4
Feature Names: Index(['petal_length', 'petal_width'], dtype='object')


# Recursive Feature Elimination RFE


In [4]:
from sklearn.datasets import fetch_california_housing
# Load data (will download the data if it's the first time loading)
d = fetch_california_housing(as_frame=True)
# Create a dataframe
df_house = d['data'].join(d['target'])
X = df_house.iloc[:,:8].values
y =  df_house[["MedHouseVal"]].values

In [5]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
estimator = LinearRegression()
selection_params = {"estimator":estimator,
                   "n_features_to_select":2,
                   "step":1,
                   "verbose":0}
selector = RFE(**selection_params).fit(X,y)
print("Estimator: {}".format(selector.estimator_))
print("Selected N Features: {}".format(selector.n_features_))
print("N Features In: {}".format(selector.n_features_in_))
print("Ranking: {}".format(selector.ranking_))
print("Mask: {}".format(selector.support_))

Estimator: LinearRegression()
Selected N Features: 2
N Features In: 8
Ranking: [2 5 4 3 7 6 1 1]
Mask: [False False False False False False  True  True]


# Select From Model

In [6]:
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import Lasso
estimator = Lasso()
selection_params = {"estimator":estimator,
                   "threshold":0.5,
                   "max_features":2,
                   }
selector = SelectFromModel(**selection_params).fit(X,y)
print("Estimator: {}".format(selector.estimator_.coef_))
print("Threshold: {}".format(selector.threshold_))

Estimator: [ 1.45469232e-01  5.81496884e-03  0.00000000e+00 -0.00000000e+00
 -6.37292607e-06 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00]
Threshold: 0.5


# Variance Threshold

In [7]:
from sklearn.feature_selection import VarianceThreshold
selector = VarianceThreshold().fit(X,y)
print("Variances: {}".format(selector.variances_))

Variances: [3.60914769e+00 5.10000000e+01 6.12123614e+00 2.24580619e-01
 3.56790000e+04 1.07864799e+02 4.56207160e+00 4.01394488e+00]


# Sequential Feature Selection (SFS)


In [10]:
from sklearn.feature_selection import SequentialFeatureSelector
# SFS uses a greedy algorithm to find best subset of features. It can go both ways, forward or backward. In case of forward, it starts with zero feature and finds the one which maximize a cross-validates score. It keeps adding new features with same procedure until it reaches maximum feature volume which defined initially by user. Backward direction follows the opposite procedure, it starts with all features and remove them. The difference of SFS is that it does not need feature importance. 

# Exhaustive Feature Selection


In [9]:
from mlxtend.feature_selection import ExhaustiveFeatureSelector
# Simple, it tries all subset combinations of features. Very exhaustive.,