# Recursive Feature Elimination -- wrapper Page

RFE works by recursively removing attributes and building a model on attributes that remain(that means It is a backword elemenation method). It uses model accuracy to identify which attributes (and combinations of attributes) contribute the most to predicting the target attribute.

The following example uses RFE with the logistic regression algorithm to select the top three features. The choice of algorithm does not matter too much as long as it is skillful and consistent:


In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import pandas
import numpy
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

url ="data/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)

In [2]:
dataframe.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
# dataframe[:,0:8] #this will not work bec it is not numpy.ndarray
type(dataframe.values)
array = dataframe.values # 'dataframe.values' will convert the 'dataframe' into numpy.ndarray type
array[:,0:8]

numpy.ndarray

array([[  6.   , 148.   ,  72.   , ...,  33.6  ,   0.627,  50.   ],
       [  1.   ,  85.   ,  66.   , ...,  26.6  ,   0.351,  31.   ],
       [  8.   , 183.   ,  64.   , ...,  23.3  ,   0.672,  32.   ],
       ...,
       [  5.   , 121.   ,  72.   , ...,  26.2  ,   0.245,  30.   ],
       [  1.   , 126.   ,  60.   , ...,  30.1  ,   0.349,  47.   ],
       [  1.   ,  93.   ,  70.   , ...,  30.4  ,   0.315,  23.   ]])

In [27]:
X = array[:,0:8]
Y = array[:,8]

In [29]:
#Feature extraction
model = LogisticRegression() 
rfe = RFE(model, 4)
fit = rfe.fit(X, Y)
#You can see that RFE chose the the top three features as preg, mass, and pedi.
print("Num Features: %d"% fit.n_features_) 
print("Selected Features: %s"% fit.support_) 
print("Feature Ranking: %s"% fit.ranking_)

Num Features: 4
Selected Features: [ True  True False False False  True  True False]
Feature Ranking: [1 1 2 4 5 1 1 3]




In [34]:
print(X[0:5,:])
features = fit.transform(X)
features.shape
print(features[0:5,:])

[[6.000e+00 1.480e+02 7.200e+01 3.500e+01 0.000e+00 3.360e+01 6.270e-01
  5.000e+01]
 [1.000e+00 8.500e+01 6.600e+01 2.900e+01 0.000e+00 2.660e+01 3.510e-01
  3.100e+01]
 [8.000e+00 1.830e+02 6.400e+01 0.000e+00 0.000e+00 2.330e+01 6.720e-01
  3.200e+01]
 [1.000e+00 8.900e+01 6.600e+01 2.300e+01 9.400e+01 2.810e+01 1.670e-01
  2.100e+01]
 [0.000e+00 1.370e+02 4.000e+01 3.500e+01 1.680e+02 4.310e+01 2.288e+00
  3.300e+01]]


(768, 4)

[[6.000e+00 1.480e+02 3.360e+01 6.270e-01]
 [1.000e+00 8.500e+01 2.660e+01 3.510e-01]
 [8.000e+00 1.830e+02 2.330e+01 6.720e-01]
 [1.000e+00 8.900e+01 2.810e+01 1.670e-01]
 [0.000e+00 1.370e+02 4.310e+01 2.288e+00]]
