### Backward selection en sklearn

In [None]:
import pandas as pd
import numpy as np

from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_selection import SequentialFeatureSelector

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
col_names = {col: col.lower().replace("/","_") for col in titanic.columns.values}

In [None]:
titanic.rename(columns=col_names, inplace=True)

In [None]:
titanic.head()

Unnamed: 0,survived,pclass,name,sex,age,siblings_spouses aboard,parents_children aboard,fare
0,0,3,Mr. Owen Harris Braund,male,22.0,1,0,7.25
1,1,1,Mrs. John Bradley (Florence Briggs Thayer) Cum...,female,38.0,1,0,71.2833
2,1,3,Miss. Laina Heikkinen,female,26.0,0,0,7.925
3,1,1,Mrs. Jacques Heath (Lily May Peel) Futrelle,female,35.0,1,0,53.1
4,0,3,Mr. William Henry Allen,male,35.0,0,0,8.05


In [None]:
# cambiamos la varaible sex de male y female a 0 y 1 respectivamente
titanic.sex.mask(titanic.sex == 'male', 0, inplace=True)
titanic.sex.mask(titanic.sex == 'female', 1, inplace=True)

In [None]:
X = titanic.drop(['survived', 'name'], axis=1)
X.head()

Unnamed: 0,pclass,sex,age,siblings_spouses aboard,parents_children aboard,fare
0,3,0,22.0,1,0,7.25
1,1,1,38.0,1,0,71.2833
2,3,1,26.0,0,0,7.925
3,1,1,35.0,1,0,53.1
4,3,0,35.0,0,0,8.05


In [None]:
y = titanic.survived

In [None]:
dt = DecisionTreeClassifier(max_depth=5, min_samples_leaf=5, random_state=111)

In [None]:
# estimator: qué estimador utilizar, en este caso un árbol de clasificación
# n_features_to_select: con cuántas variables nos queremos quedar
# direction: backward o forward
# scoring: qué métrica de desempeño utilizar para seleccionar 
selector_backward = SequentialFeatureSelector(estimator=dt, n_features_to_select=4, direction="backward",
                                     scoring='precision', cv=5)
selector_backward.fit(X,y)

SequentialFeatureSelector(direction='backward',
                          estimator=DecisionTreeClassifier(max_depth=5,
                                                           min_samples_leaf=5,
                                                           random_state=111),
                          n_features_to_select=4, scoring='precision')

In [None]:
selector_backward.support_

array([ True,  True, False,  True, False,  True])

Variables seleccionadas

In [None]:
pd.DataFrame({'column': X.columns.values, 'selected': selector_backward.support_})

Unnamed: 0,column,selected
0,pclass,True
1,sex,True
2,age,False
3,siblings_spouses aboard,True
4,parents_children aboard,False
5,fare,True


### Forward selection en sklearn

In [None]:
selector_forward = SequentialFeatureSelector(estimator=dt, n_features_to_select=4, direction="forward",
                                     scoring='precision', cv=5)
selector_forward.fit(X,y)

SequentialFeatureSelector(estimator=DecisionTreeClassifier(max_depth=5,
                                                           min_samples_leaf=5,
                                                           random_state=111),
                          n_features_to_select=4, scoring='precision')

In [None]:
selector_forward.support_

array([ True,  True, False,  True, False,  True])

In [None]:
pd.DataFrame({'column': X.columns.values, 'selected': selector_forward.support_})

Unnamed: 0,column,selected
0,pclass,True
1,sex,True
2,age,False
3,siblings_spouses aboard,True
4,parents_children aboard,False
5,fare,True
