### Feature Selection
#### What it is :
- Selecting only important features and removing noise.

### Why needed
- Reduce overfitting
- Faster training
- Better generalization

In [None]:
from sklearn.datasets import load_breast_cancer
import pandas as pd
import numpy as np

In [None]:
# Example dataset

X, y = load_breast_cancer(return_X_y=True, as_frame=True)

In [None]:
# Method 1: Variance Threshold

from sklearn.feature_selection import VarianceThreshold

selector = VarianceThreshold(threshold=0.01)
X_selected = selector.fit_transform(X)


In [11]:
# Method 2: Correlation-based removal

corr_matrix = X.corr().abs()
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))

to_drop = [column for column in upper.columns if any(upper[column] > 0.9)]
X_reduced = X.drop(columns=to_drop)


In [None]:
# Method 3: SelectKBest (Statistical)

from sklearn.feature_selection import SelectKBest, chi2

selector = SelectKBest(score_func=chi2, k=10)
X_new = selector.fit_transform(abs(X), y)


In [None]:
# Method 4: Recursive Feature Elimination (RFE)


from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=5000)
rfe = RFE(model, n_features_to_select=10)
X_rfe = rfe.fit_transform(X, y)
