# Titanic Dataset

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.ensemble import RandomForestClassifier
from mlxtend.feature_selection import ExhaustiveFeatureSelector as EFS
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFECV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import Lasso  
from sklearn.linear_model import Ridge       
from sklearn.linear_model import ElasticNet

**Importing the dataset where Feature selection was made**

In [2]:
df = pd.read_csv("/kaggle/input/titanic-fs/titanic_fs_dataset (1).csv")
print(df)

     Pclass        Age  Parch     Fare  Sex_male  Embarked_S  target
0         3  22.000000      0   7.2500      True        True       0
1         1  38.000000      0  71.2833     False       False       1
2         3  26.000000      0   7.9250     False        True       1
3         1  35.000000      0  53.1000     False        True       1
4         3  35.000000      0   8.0500      True        True       0
..      ...        ...    ...      ...       ...         ...     ...
886       2  27.000000      0  13.0000      True        True       0
887       1  19.000000      0  30.0000     False        True       1
888       3  29.699118      2  23.4500     False        True       0
889       1  26.000000      0  30.0000      True       False       1
890       3  32.000000      0   7.7500      True       False       0

[891 rows x 7 columns]


In [3]:
X = df.drop('target', axis = 1)
y = df['target']

# Wrapper Methods

**Forward Selection**

In [4]:


sfs1 = SFS(RandomForestClassifier(n_jobs=4, random_state=42),
           k_features=5,
           forward=True,
           floating=False,
           verbose=2,
           scoring='accuracy', 
           cv=3)

sfs1 = sfs1.fit(X, y)

print('Best accuracy score: %.4f' % sfs1.k_score_)
print('Best subset (indices):', sfs1.k_feature_idx_)
print('Best subset (names):', sfs1.k_feature_names_)

x_forward = X[list(sfs1.k_feature_names_)]



[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    5.3s finished

[2025-08-27 11:00:32] Features: 1/5 -- score: 0.7867564534231201[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    4.6s finished

[2025-08-27 11:00:37] Features: 2/5 -- score: 0.787878787878788[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    3.6s finished

[2025-08-27 11:00:40] Features: 3/5 -- score: 0.7968574635241302[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    2.7s finished

[2025-08-27 11:00:43] Features: 4/5 -- score: 0.8013468013468014

Best accuracy score: 0.7834
Best subset (indices): (0, 1, 2, 4, 5)
Best subset (names): ('Pclass', 'Age', 'Parch', 'Sex_male', 'Embarked_S')


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.9s finished

[2025-08-27 11:00:45] Features: 5/5 -- score: 0.7833894500561168

**Backward Selection**

In [5]:


sfs1 = SFS(RandomForestClassifier(n_jobs=4, random_state=42),
           k_features=5,
           forward=False,
           floating=False,
           verbose=2,
           scoring='accuracy',
           cv=3)

sfs1 = sfs1.fit(X, y)

print('Best precision score: %.4f' % sfs1.k_score_)
print('Best subset (indices):', sfs1.k_feature_idx_)
print('Best subset (names):', sfs1.k_feature_names_)
x_backward = X[list(sfs1.k_feature_names_)]


Best precision score: 0.8081
Best subset (indices): (0, 1, 2, 3, 4)
Best subset (names): ('Pclass', 'Age', 'Parch', 'Fare', 'Sex_male')


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    5.8s finished

[2025-08-27 11:00:52] Features: 5/5 -- score: 0.8080808080808081

**Exhaustive Search**

In [6]:
# Initialize the model
clf = RandomForestClassifier(n_jobs=4, random_state=42)

# Run Exhaustive Feature Selection
efs = EFS(clf,
          min_features=2,           
          max_features=5,            
          scoring='precision',        
          print_progress=True, 
          cv=3,                     
          n_jobs=1)                  

efs = efs.fit(X, y)

print('Best precision score: %.4f' % efs.best_score_)
print('Best subset (indices):', efs.best_idx_)
print('Best subset (names):', efs.best_feature_names_)

X_exhaustive = X[list(efs.best_feature_names_)]



Features: 56/56

Best precision score: 0.8848
Best subset (indices): (0, 4, 5)
Best subset (names): ('Pclass', 'Sex_male', 'Embarked_S')


**Recursive Feature Elimination**

In [7]:

# Step 1: Create the model
clf = RandomForestClassifier(n_jobs=4, random_state=42)

# Step 2: Initialize RFECV
rfecv = RFECV(estimator=clf,
              step=1,
              cv=StratifiedKFold(3),
              scoring='precision',
              n_jobs=4,
              verbose=2)

# Step 3: Fit the selector
rfecv.fit(X, y)

print("Optimal number of features: %d" % rfecv.n_features_)
print("Selected feature indices:", rfecv.support_.nonzero()[0])
print("Selected feature names:", X.columns[rfecv.support_].tolist())
X_selected_rs = X.loc[:, rfecv.support_]


Fitting estimator with 6 features.
Fitting estimator with 5 features.
Optimal number of features: 4
Selected feature indices: [0 1 3 4]
Selected feature names: ['Pclass', 'Age', 'Fare', 'Sex_male']


# Embedded Methods

**Lasso L1 Regularization**

In [8]:
lasso = Lasso(alpha=0.01)
lasso.fit(X, y)

selected_lasso = X.columns[(lasso.coef_ != 0)]
print("LASSO selected features:", list(selected_lasso))


LASSO selected features: ['Pclass', 'Age', 'Parch', 'Fare', 'Sex_male', 'Embarked_S']


**L2 Ridge Regularization**

In [9]:
from sklearn.linear_model import Ridge

ridge = Ridge(alpha=0.01)
ridge.fit(X, y)

selected_ridge = X.columns[ridge.coef_ > 0]

print("RIDGE selected features: ", list(selected_ridge))


RIDGE selected features:  ['Fare']


**L1/L2 Regularization Elastic Net**

In [10]:
elastic_net = ElasticNet(alpha=0.02, l1_ratio=0.5, random_state=42)

elastic_net.fit(X, y)

selected_elastic = X.columns[elastic_net.coef_ != 0]

print("Elastic Net selected features:", list(selected_elastic))


Elastic Net selected features: ['Pclass', 'Age', 'Parch', 'Fare', 'Sex_male', 'Embarked_S']
