In [1]:
import numpy as np
import pandas as pd
import scipy.stats as ss

In [2]:
## Constructure the dataframe with 4 columns.
#  ‘D’ is the label
df = pd.DataFrame({'A':ss.norm.rvs(size=10), 
                   'B':ss.norm.rvs(size=10), 
                   'C':ss.norm.rvs(size=10), 
                   'D':np.random.randint(low=0, high=2, size=10)})
df

Unnamed: 0,A,B,C,D
0,0.10767,-0.872661,-1.156716,1
1,-0.311089,0.243511,0.663942,0
2,0.928557,-1.091504,2.238937,0
3,-1.965702,-0.534545,-1.706319,0
4,-0.224745,-0.186885,-0.044241,0
5,-1.415967,0.324085,1.413737,0
6,1.137793,0.10066,1.550108,0
7,0.929584,-0.263292,0.049247,1
8,-0.786499,1.737097,0.07166,1
9,0.39793,0.639269,0.529168,0


In [3]:
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

In [4]:
## define the features and label
X = df.loc[:, ['A','B','C']]
Y = df.loc[:, 'D']

In [5]:
## import the feature selection package
#  SelectKBest过滤思想类，RFE包裹思想类，SelectFromModel嵌入思想类
from sklearn.feature_selection import SelectKBest, RFE, SelectFromModel

In [6]:
skb = SelectKBest(k=2)
skb.fit(X,Y)

SelectKBest(k=2, score_func=<function f_classif at 0x1a24fbf158>)

In [7]:
skb.transform(X)

array([[-0.87266115, -1.15671589],
       [ 0.24351114,  0.66394214],
       [-1.09150427,  2.23893746],
       [-0.53454543, -1.70631883],
       [-0.18688524, -0.04424111],
       [ 0.3240854 ,  1.41373676],
       [ 0.10065995,  1.5501078 ],
       [-0.26329175,  0.04924709],
       [ 1.73709742,  0.07165986],
       [ 0.63926935,  0.52916838]])

In [8]:
rfe = RFE(estimator=SVR(kernel='linear'), n_features_to_select=2, step=1)
rfe.fit_transform(X, Y)

array([[-0.87266115, -1.15671589],
       [ 0.24351114,  0.66394214],
       [-1.09150427,  2.23893746],
       [-0.53454543, -1.70631883],
       [-0.18688524, -0.04424111],
       [ 0.3240854 ,  1.41373676],
       [ 0.10065995,  1.5501078 ],
       [-0.26329175,  0.04924709],
       [ 1.73709742,  0.07165986],
       [ 0.63926935,  0.52916838]])

In [10]:
sfm = SelectFromModel(estimator=DecisionTreeRegressor(), threshold=0.1)
sfm.fit_transform(X,Y)

array([[-0.87266115, -1.15671589],
       [ 0.24351114,  0.66394214],
       [-1.09150427,  2.23893746],
       [-0.53454543, -1.70631883],
       [-0.18688524, -0.04424111],
       [ 0.3240854 ,  1.41373676],
       [ 0.10065995,  1.5501078 ],
       [-0.26329175,  0.04924709],
       [ 1.73709742,  0.07165986],
       [ 0.63926935,  0.52916838]])