# Feature selection.

We use feature selection after wrangling for two reasons.

1. Obtain a set of good features that represents the current dataset.
2. Obtain the set of *not good* features that should be refined in the next wrangling step.

This happens in three steps.

1. A first preselection step removes obviously bad features.
2. A second preselection step removes features that have the same predictive capabilities, in order to prevent the final feature selection step to select.
3. A real feature selection step to make the final decision.

The following methods are implemented in this notebook.

1. A (baseline) random sampling based approach — done.
2. CHCGA — a genetic algorithm based approach — done.
3. SFFS — a forward selection based approach — done.
4. AdaBoost with decision stump approach — TODO.

Both (1) and (2) allow use to set a max running time.

In [1]:
%reload_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from typing import Optional, List, Tuple, Callable
from tqdm.notebook import tqdm
from avatar.language import WranglingLanguage
from avatar.analysis import *

Load dataset.

In [2]:
titanic = pd.read_csv("../../data/raw/demo/titanic.csv")
titanic.Survived = titanic.Survived.astype("category")
titanic

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


Find transformed columns. Don't use replacement.

In [3]:
language = WranglingLanguage()
expanded = language.expand(titanic, target="Survived")
expanded

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,...,OneHot()(Parch)_5,OneHot()(Parch)_6,OneHot()(Embarked)_C,OneHot()(Embarked)_Q,OneHot()(Embarked)_S,"NaN(Pernot, Mr. Rene)(Name)_Name","NaN(Somerton, Mr. Francis William)(Name)_Name",WordToNumber()(Ticket)_Ticket,ModeImputation()(Cabin)_Cabin,ModeImputation()(Embarked)_Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,...,0,0,0,0,1,"Braund, Mr. Owen Harris","Braund, Mr. Owen Harris",,B96 B98,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,...,0,0,1,0,0,"Cumings, Mrs. John Bradley (Florence Briggs Th...","Cumings, Mrs. John Bradley (Florence Briggs Th...",,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,...,0,0,0,0,1,"Heikkinen, Miss. Laina","Heikkinen, Miss. Laina",,B96 B98,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,...,0,0,0,0,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)","Futrelle, Mrs. Jacques Heath (Lily May Peel)",113803.0,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,...,0,0,0,0,1,"Allen, Mr. William Henry","Allen, Mr. William Henry",373450.0,B96 B98,S
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,...,0,0,0,0,1,"Montvila, Rev. Juozas","Montvila, Rev. Juozas",211536.0,B96 B98,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,...,0,0,0,0,1,"Graham, Miss. Margaret Edith","Graham, Miss. Margaret Edith",112053.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,...,0,0,0,0,1,"Johnston, Miss. Catherine Helen ""Carrie""","Johnston, Miss. Catherine Helen ""Carrie""",,B96 B98,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,...,0,0,1,0,0,"Behr, Mr. Karl Howell","Behr, Mr. Karl Howell",111369.0,C148,C


## Pruning

Remove some features that are not appropriate and don't need more wrangling.

In [4]:
from avatar.selection import *


pruner = StackedFilter([ConstantFilter(),
                        IdenticalFilter()])
pruned = pruner.select(expanded)
pruned

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,...,OneHot()(Parch)_5,OneHot()(Parch)_6,OneHot()(Embarked)_C,OneHot()(Embarked)_Q,OneHot()(Embarked)_S,"NaN(Pernot, Mr. Rene)(Name)_Name","NaN(Somerton, Mr. Francis William)(Name)_Name",WordToNumber()(Ticket)_Ticket,ModeImputation()(Cabin)_Cabin,ModeImputation()(Embarked)_Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,...,0,0,0,0,1,"Braund, Mr. Owen Harris","Braund, Mr. Owen Harris",,B96 B98,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,...,0,0,1,0,0,"Cumings, Mrs. John Bradley (Florence Briggs Th...","Cumings, Mrs. John Bradley (Florence Briggs Th...",,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,...,0,0,0,0,1,"Heikkinen, Miss. Laina","Heikkinen, Miss. Laina",,B96 B98,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,...,0,0,0,0,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)","Futrelle, Mrs. Jacques Heath (Lily May Peel)",113803.0,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,...,0,0,0,0,1,"Allen, Mr. William Henry","Allen, Mr. William Henry",373450.0,B96 B98,S
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,...,0,0,0,0,1,"Montvila, Rev. Juozas","Montvila, Rev. Juozas",211536.0,B96 B98,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,...,0,0,0,0,1,"Graham, Miss. Margaret Edith","Graham, Miss. Margaret Edith",112053.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,...,0,0,0,0,1,"Johnston, Miss. Catherine Helen ""Carrie""","Johnston, Miss. Catherine Helen ""Carrie""",,B96 B98,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,...,0,0,1,0,0,"Behr, Mr. Karl Howell","Behr, Mr. Karl Howell",111369.0,C148,C


## Preselection

Preselect features that will never be appropriate. These can still be wrangled.

* Remove columns with too many missing values.
* Columns consisting of unique, categorical features are removed.

In [5]:
from avatar.selection import *
    
preselector = StackedFilter([BijectiveFilter(),
                             UniqueFilter(),
                             MissingFilter()])
preselected = preselector.select(expanded)
preselected

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked,...,OneHot()(Parch)_3,OneHot()(Parch)_4,OneHot()(Parch)_5,OneHot()(Parch)_6,OneHot()(Embarked)_C,OneHot()(Embarked)_Q,OneHot()(Embarked)_S,WordToNumber()(Ticket)_Ticket,ModeImputation()(Cabin)_Cabin,ModeImputation()(Embarked)_Embarked
0,1,0,3,male,22.0,1,0,A/5 21171,7.2500,S,...,0,0,0,0,0,0,1,,B96 B98,S
1,2,1,1,female,38.0,1,0,PC 17599,71.2833,C,...,0,0,0,0,1,0,0,,C85,C
2,3,1,3,female,26.0,0,0,STON/O2. 3101282,7.9250,S,...,0,0,0,0,0,0,1,,B96 B98,S
3,4,1,1,female,35.0,1,0,113803,53.1000,S,...,0,0,0,0,0,0,1,113803.0,C123,S
4,5,0,3,male,35.0,0,0,373450,8.0500,S,...,0,0,0,0,0,0,1,373450.0,B96 B98,S
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,male,27.0,0,0,211536,13.0000,S,...,0,0,0,0,0,0,1,211536.0,B96 B98,S
887,888,1,1,female,19.0,0,0,112053,30.0000,S,...,0,0,0,0,0,0,1,112053.0,B42,S
888,889,0,3,female,,1,2,W./C. 6607,23.4500,S,...,0,0,0,0,0,0,1,,B96 B98,S
889,890,1,1,male,26.0,0,0,111369,30.0000,C,...,0,0,0,0,1,0,0,111369.0,C148,C


We sample a subset of the data with at least one row containing no NaNs.

In [6]:
sampler = WeightedColumnSampler(preselected)
sampled = sampler.sample()
sampled

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Ticket,Fare,Split(. )(Name)_0,Split(. )(Name)_1,...,OneHot()(Embarked)_Q,OneHot()(Embarked)_S,ModeImputation()(Cabin)_Cabin,ModeImputation()(Embarked)_Embarked,Age,Embarked,Split( )(Name)_3,ExtractNumber()(Ticket)_0,"ExtractWord([Mrs, Mr, Master, Dr, Rev, Miss])(Name)_0",WordToNumber()(Ticket)_Ticket
0,1,0,3,male,1,0,A/5 21171,7.2500,"Braund, Mr",Owen Harris,...,0,1,B96 B98,S,22.0,S,Harris,5.0,Mr,
1,2,1,1,female,1,0,PC 17599,71.2833,"Cumings, Mrs",John Bradley (Florence Briggs Thayer),...,0,0,C85,C,38.0,C,Bradley,17599.0,Mrs,
2,3,1,3,female,0,0,STON/O2. 3101282,7.9250,"Heikkinen, Miss",Laina,...,0,1,B96 B98,S,26.0,S,,2.0,Miss,
3,4,1,1,female,1,0,113803,53.1000,"Futrelle, Mrs",Jacques Heath (Lily May Peel),...,0,1,C123,S,35.0,S,Heath,113803.0,Mrs,113803.0
4,5,0,3,male,0,0,373450,8.0500,"Allen, Mr",William Henry,...,0,1,B96 B98,S,35.0,S,Henry,373450.0,Mr,373450.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,male,0,0,211536,13.0000,"Montvila, Rev",Juozas,...,0,1,B96 B98,S,27.0,S,,211536.0,Rev,211536.0
887,888,1,1,female,0,0,112053,30.0000,"Graham, Miss",Margaret Edith,...,0,1,B42,S,19.0,S,Edith,112053.0,Miss,112053.0
888,889,0,3,female,1,2,W./C. 6607,23.4500,"Johnston, Miss","Catherine Helen ""Carrie""",...,0,1,B96 B98,S,,S,Helen,6607.0,Miss,
889,890,1,1,male,0,0,111369,30.0000,"Behr, Mr",Karl Howell,...,0,0,C148,C,26.0,C,Howell,111369.0,Mr,111369.0


Next, we look for features with the same predictive power using a wrapper approach. A decision stump is learned for each feature individually and the predictions for this stump are compared. Features that make the same predictions are pruned.

In [7]:
from avatar.selection import IterativeFilter


preselected = IterativeFilter().select(sampled, target="Survived")
preselected

Unnamed: 0,PassengerId,Survived,Pclass,Sex,SibSp,Parch,Ticket,Fare,Split(. )(Name)_0,Split(. )(Name)_1,...,OneHot()(Embarked)_Q,OneHot()(Embarked)_S,ModeImputation()(Cabin)_Cabin,ModeImputation()(Embarked)_Embarked,Age,Embarked,Split( )(Name)_3,ExtractNumber()(Ticket)_0,"ExtractWord([Mrs, Mr, Master, Dr, Rev, Miss])(Name)_0",WordToNumber()(Ticket)_Ticket
0,1,0,3,male,1,0,A/5 21171,7.2500,"Braund, Mr",Owen Harris,...,0,1,B96 B98,S,22.0,S,Harris,5.0,Mr,
1,2,1,1,female,1,0,PC 17599,71.2833,"Cumings, Mrs",John Bradley (Florence Briggs Thayer),...,0,0,C85,C,38.0,C,Bradley,17599.0,Mrs,
2,3,1,3,female,0,0,STON/O2. 3101282,7.9250,"Heikkinen, Miss",Laina,...,0,1,B96 B98,S,26.0,S,,2.0,Miss,
3,4,1,1,female,1,0,113803,53.1000,"Futrelle, Mrs",Jacques Heath (Lily May Peel),...,0,1,C123,S,35.0,S,Heath,113803.0,Mrs,113803.0
4,5,0,3,male,0,0,373450,8.0500,"Allen, Mr",William Henry,...,0,1,B96 B98,S,35.0,S,Henry,373450.0,Mr,373450.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,male,0,0,211536,13.0000,"Montvila, Rev",Juozas,...,0,1,B96 B98,S,27.0,S,,211536.0,Rev,211536.0
887,888,1,1,female,0,0,112053,30.0000,"Graham, Miss",Margaret Edith,...,0,1,B42,S,19.0,S,Edith,112053.0,Miss,112053.0
888,889,0,3,female,1,2,W./C. 6607,23.4500,"Johnston, Miss","Catherine Helen ""Carrie""",...,0,1,B96 B98,S,,S,Helen,6607.0,Miss,
889,890,1,1,male,0,0,111369,30.0000,"Behr, Mr",Karl Howell,...,0,0,C148,C,26.0,C,Howell,111369.0,Mr,111369.0


### Feature Selector: Evaluator

Wrapping evaluation in a class saves the time of converting data for MERCS and allows us to reuse the same split in every iteration.

In [8]:
from avatar.analysis import FeatureEvaluator

mask = np.random.randint(2, size=len(preselected.columns))
mask = np.zeros_like(mask)

evaluator_shap = FeatureEvaluator(folds=10, method="shap")
evaluator_shap.fit(preselected, target="Survived")


evaluator_fimp = FeatureEvaluator(folds=10)
evaluator_fimp.fit(preselected, target="Survived")

In [9]:
df = pd.DataFrame()
df['cols']=preselected.columns
df['shap']=evaluator_shap.importances(mask)
df['fimp']=evaluator_fimp.importances(mask)

Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.


In [10]:
df.sort_values(by='fimp', ascending=False).head(10)

Unnamed: 0,cols,shap,fimp
3,Sex,0.31034,0.269746
6,Ticket,0.137362,0.142191
38,Age,0.079903,0.082172
7,Fare,0.072893,0.067056
10,"Split(, )(Name)_1",0.02232,0.065075
40,Split( )(Name)_3,0.025935,0.045788
8,Split(. )(Name)_0,0.022812,0.03662
9,Split(. )(Name)_1,0.019689,0.032718
15,Split(./)(Ticket)_0,0.027968,0.031791
0,PassengerId,0.039235,0.031737


In [11]:
df.sort_values(by='shap', ascending=False).head(10)

Unnamed: 0,cols,shap,fimp
3,Sex,0.31034,0.269746
6,Ticket,0.137362,0.142191
38,Age,0.079903,0.082172
7,Fare,0.072893,0.067056
43,WordToNumber()(Ticket)_Ticket,0.043182,0.02061
0,PassengerId,0.039235,0.031737
16,Split(/)(Ticket)_0,0.033958,0.02436
15,Split(./)(Ticket)_0,0.027968,0.031791
40,Split( )(Name)_3,0.025935,0.045788
41,ExtractNumber()(Ticket)_0,0.022836,0.018119


## Feature selection


Next, we can take a look at actual feature selection. Three wrapper methods are implemented.

* Randomly sampling columns, training a model and getting the feature relevances.
* A genetic approach, which is similar but should combine features slightly better. We perform a small experiment on whether to fix the genome size.
* Classic sequential and backwards sequential feature selection.

The idea is similar; the feature selection algorithms return sets of feature importances and the associated scores.

### Mask-Generation: Random

Randomly sample subsets of features, evaluate and get feature relevances.

In [12]:
from avatar.selection import SamplingSelector


ss = SamplingSelector(iterations=100, evaluator=evaluator_shap)
ss.fit(preselected, target="Survived")

Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

In [13]:
ss.scores()

array([2.52191871e-02, 0.00000000e+00, 4.56997677e-02, 1.57076047e-01,
       1.25973649e-02, 4.74921873e-03, 3.03684996e-02, 5.92219620e-02,
       3.54136841e-02, 1.99385080e-02, 1.53676562e-01, 2.62729696e-02,
       1.80317119e-02, 3.47306008e-02, 2.24779799e-02, 3.82649647e-02,
       5.08733390e-02, 5.15331979e-03, 2.70975874e-02, 2.39078315e-03,
       5.60061198e-03, 3.26693693e-04, 3.94712344e-04, 6.67811393e-04,
       1.07551429e-04, 6.71703604e-06, 3.14830960e-03, 1.86317804e-03,
       1.42506057e-03, 4.29374310e-05, 9.17640935e-05, 6.38265819e-04,
       2.82837971e-05, 2.59181621e-03, 1.28890953e-03, 5.71790581e-03,
       1.45290625e-02, 5.19116252e-03, 4.02275645e-02, 8.43287531e-03,
       3.70952354e-02, 3.06768259e-02, 3.44689281e-02, 3.61837597e-02])

In [14]:
ss._fimps.shape

(100, 44)

In [15]:
ss.ordered()

Index(['Sex', 'Split(, )(Name)_1', 'Fare', 'Split(/)(Ticket)_0', 'Pclass',
       'Age', 'Split(./)(Ticket)_0', 'Split( )(Name)_3',
       'WordToNumber()(Ticket)_Ticket', 'Split(. )(Name)_0',
       'Split( )(Name)_1',
       'ExtractWord([Mrs, Mr, Master, Dr, Rev, Miss])(Name)_0',
       'ExtractNumber()(Ticket)_0', 'Ticket', 'OneHot()(Pclass)_3',
       'Split(-)(Name)_0', 'PassengerId', 'Split( )(Name)_2',
       'Split(. )(Name)_1', 'Split(')(Name)_0',
       'ModeImputation()(Cabin)_Cabin', 'SibSp', 'Embarked',
       'OneHot()(Embarked)_S', 'OneHot()(SibSp)_1',
       'ModeImputation()(Embarked)_Embarked', 'OneHot()(Pclass)_2', 'Parch',
       'OneHot()(Parch)_0', 'OneHot()(Embarked)_C', 'OneHot()(SibSp)_0',
       'OneHot()(Parch)_1', 'OneHot()(Parch)_2', 'OneHot()(Embarked)_Q',
       'OneHot()(SibSp)_4', 'OneHot()(Parch)_5', 'OneHot()(SibSp)_3',
       'OneHot()(SibSp)_2', 'OneHot()(SibSp)_5', 'OneHot()(Parch)_4',
       'OneHot()(Parch)_3', 'OneHot()(Parch)_6', 'OneHot()(Sib

### Mask Generation: Genetic

The CHC Genetic Algorithm for feature selection. Uses

* Cross-generational elitist selection
* Heterogeneous recombination
* and Cataclysmic mutation

for maintaining diversity and avoiding stagnation.

After the final population is obtained, combine importances from this population.

In [16]:
from avatar.selection import CHCGASelector, Population, Individual
    

gas = CHCGASelector(iterations=100, evaluator=evaluator_shap)
gas.fit(preselected, target="Survived")

Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

In [17]:
gas.scores()

array([0.00000000e+00, 0.00000000e+00, 6.55971457e-02, 2.77363384e-01,
       2.74148632e-02, 4.71307993e-03, 5.15096244e-02, 7.42476843e-02,
       2.89363787e-02, 0.00000000e+00, 0.00000000e+00, 5.03501724e-02,
       3.94281121e-02, 2.06487251e-02, 0.00000000e+00, 6.75102986e-02,
       2.90388081e-02, 4.56538500e-04, 5.46694573e-02, 3.78920211e-03,
       5.79483209e-03, 8.37101372e-04, 1.85309209e-04, 7.19169271e-06,
       5.28386115e-05, 0.00000000e+00, 4.36739911e-03, 1.45359944e-03,
       8.67899286e-04, 0.00000000e+00, 2.17468086e-05, 4.97777717e-04,
       0.00000000e+00, 2.21713267e-03, 2.94809961e-03, 4.94244487e-03,
       1.93231452e-02, 2.06129760e-03, 0.00000000e+00, 2.09339143e-02,
       0.00000000e+00, 7.62774653e-02, 6.15373303e-02, 0.00000000e+00])

In [18]:
gas.ordered()

Index(['Sex', 'ExtractNumber()(Ticket)_0', 'Fare', 'Split(./)(Ticket)_0',
       'Pclass', 'ExtractWord([Mrs, Mr, Master, Dr, Rev, Miss])(Name)_0',
       'OneHot()(Pclass)_3', 'Ticket', 'Split(-)(Name)_0', 'Split(')(Name)_0',
       'Split(/)(Ticket)_0', 'Split(. )(Name)_0', 'SibSp', 'Embarked',
       'Split( )(Name)_1', 'ModeImputation()(Cabin)_Cabin',
       'OneHot()(SibSp)_1', 'OneHot()(Embarked)_S', 'Parch',
       'OneHot()(Parch)_0', 'OneHot()(SibSp)_0', 'OneHot()(Embarked)_Q',
       'OneHot()(Embarked)_C', 'ModeImputation()(Embarked)_Embarked',
       'OneHot()(Parch)_1', 'OneHot()(Parch)_2', 'OneHot()(SibSp)_2',
       'OneHot()(Parch)_5', 'OneHot()(Pclass)_2', 'OneHot()(SibSp)_3',
       'OneHot()(SibSp)_5', 'OneHot()(Parch)_4', 'OneHot()(SibSp)_4',
       'Survived', 'WordToNumber()(Ticket)_Ticket', 'Split(. )(Name)_1',
       'Split(, )(Name)_1', 'Split( )(Name)_2', 'OneHot()(SibSp)_8',
       'OneHot()(Parch)_3', 'OneHot()(Parch)_6', 'Age', 'Split( )(Name)_3',
       'P

### Mask Generation: SFFS

Sequential Forward Floating Selection. We don't use the adaptive version as there will often be many columns and that is too slow.

In [19]:
from avatar.selection import SFFSelector
    

sffs = SFFSelector(iterations=40, evaluator=evaluator_shap)
sffs.fit(preselected, target="Survived")

Setting feature_perturbation = "tree_path_dependent" because no background data was given.


[-1.  1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1.]


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding Split( )(Name)_1


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding SibSp


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Parch)_5


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding Sex


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Parch)_4


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding Parch


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding ModeImputation()(Cabin)_Cabin


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(SibSp)_2


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Parch)_3


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(SibSp)_1


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Pclass)_2


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(SibSp)_0


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(SibSp)_3


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(SibSp)_4


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(SibSp)_3


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding SibSp


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(SibSp)_5


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Pclass)_2


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(SibSp)_8


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding Sex


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Parch)_3


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(SibSp)_1


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Parch)_0


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Parch)_6


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding ExtractWord([Mrs, Mr, Master, Dr, Rev, Miss])(Name)_0


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Embarked)_Q


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Embarked)_S


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Embarked)_C


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding ModeImputation()(Embarked)_Embarked


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding Embarked


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(SibSp)_3


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(SibSp)_0


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Pclass)_2


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Pclass)_2


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Parch)_3


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Embarked)_S


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Parch)_1


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Pclass)_3


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding Pclass


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

Adding OneHot()(Parch)_0


Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.
Setting feature_perturbation = "tree_path_dependent" because no background data was given.

In [20]:
sffs.scores()

array([0.00000000e+00, 0.00000000e+00, 1.18660181e-02, 3.52325887e-01,
       9.62458745e-02, 4.96433618e-03, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 3.05239264e-01, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 2.65923836e-02, 1.65280062e-02, 1.58297735e-02,
       2.24929854e-02, 4.62948555e-03, 1.69429842e-03, 4.59160319e-03,
       1.02865706e-05, 2.53355097e-04, 2.61529186e-03, 3.32415013e-03,
       0.00000000e+00, 1.36243606e-03, 7.03555063e-03, 8.29368720e-03,
       8.51184908e-04, 8.75293678e-03, 2.71022778e-03, 6.05561432e-03,
       8.42842049e-03, 1.42911292e-02, 0.00000000e+00, 1.01939456e-02,
       0.00000000e+00, 0.00000000e+00, 6.28218678e-02, 0.00000000e+00])

In [21]:
sffs.ordered()

Index(['Sex', 'Split( )(Name)_1', 'SibSp',
       'ExtractWord([Mrs, Mr, Master, Dr, Rev, Miss])(Name)_0',
       'OneHot()(Pclass)_2', 'OneHot()(SibSp)_1', 'OneHot()(Pclass)_3',
       'OneHot()(SibSp)_0', 'ModeImputation()(Embarked)_Embarked', 'Pclass',
       'Embarked', 'OneHot()(Embarked)_C', 'ModeImputation()(Cabin)_Cabin',
       'OneHot()(Parch)_5', 'OneHot()(Parch)_4', 'OneHot()(Embarked)_S',
       'Parch', 'OneHot()(SibSp)_2', 'OneHot()(SibSp)_4', 'OneHot()(Parch)_1',
       'OneHot()(Embarked)_Q', 'OneHot()(Parch)_0', 'OneHot()(SibSp)_3',
       'OneHot()(Parch)_3', 'OneHot()(Parch)_6', 'OneHot()(SibSp)_8',
       'OneHot()(SibSp)_5', 'Split(, )(Name)_1', 'Ticket', 'Fare', 'Survived',
       'Split(. )(Name)_0', 'Split(. )(Name)_1',
       'WordToNumber()(Ticket)_Ticket', 'Split(-)(Name)_0', 'Split(')(Name)_0',
       'Split( )(Name)_2', 'Split(./)(Ticket)_0', 'Split(/)(Ticket)_0',
       'OneHot()(Parch)_2', 'Age', 'Split( )(Name)_3',
       'ExtractNumber()(Ticket)_0', 'P