In [None]:
#Filter Methods
#1Correlation-based Selection
# Compute correlation matrix
correlation_matrix = X.corr()
#CRIM: is for correlation between target and feature
# Select features that have a high correlation with the target variable
cor_target = abs(correlation_matrix["CRIM"])  # Let's say we're interested in "CRIM"
relevant_features = cor_target[cor_target > 0.5]  # Selecting features with correlation > 0.5




 #2Chi-Square Test
#Explanation:
#The chi-square test is used for categorical data. It tests whether there is a significant relationship between each feature and the target variable. Features with low chi-square values are typically dropped.


from sklearn.feature_selection import SelectKBest

# Apply the chi-square test
chi2_selector = SelectKBest(chi2, k=2)  # Selecting top 2 features based on chi-square score
X_new = chi2_selector.fit_transform(X, y)

# Get the selected features
selected_features = X.columns[chi2_selector.get_support()]




#3Decision Trees / Random Forest Feature Importance
#Explanation:
#Tree-based models like Random Forests can automatically compute feature 
#importance based on how much each feature contributes to reducing the impurity (e.g., Gini or entropy) in the decision tree.
# Train Random Forest model
model = RandomForestClassifier()
model.fit(X, y)

# Get feature importance
importances = model.feature_importances_

# Sort features based on importance
sorted_indices = importances.argsort()
print("Selected features:", X.columns[sorted_indices[::-1]])  # Sorted from most to least important

In [None]:
# 4: Dimensionality Reduction (PCA)
#PCA is a dimensionality reduction technique that transforms the data into a new coordinate system,transfer to 2 dimensional
from sklearn.decomposition import PCA
pca = PCA(n_components=2)  # Reduce to 2 principal components
X_pca = pca.fit_transform(X)
X_pca_df = pd.DataFrame(X_pca, columns=["PC1", "PC2"])



#5:
#utual Information (MI) measures the dependency between two variables (in this case, the features and the target variable). It quantifies how much information the target variable
#provides about each feature. Higher values of MI mean stronger relationships between the feature and the target.
from sklearn.feature_selection import mutual_info_classif
mi = mutual_info_classif(X, y)
selected_features = X.columns[mi > 0.1]



#6:
#rom sklearn.linear_model import Lasso
from sklearn.datasets import load_boston
import pandas as pd

# Load dataset
data = load_boston()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Apply Lasso regression
lasso = Lasso(alpha=0.1)
lasso.fit(X, y)

# Print selected features (non-zero coefficients)
selected_features = X.columns[lasso.coef_ != 0]
print("Selected features:", selected_features)




#7:
# Wrapper Methods
#a. Recursive Feature Elimination (RFE)
#Explanation:
#RFE recursively removes the least important features based on a chosen model. It uses the model to assess feature importance and eliminates features with the least contribution.

# Define the model
model = LogisticRegression(max_iter=200)

# Apply RFE (select top 2 features)
rfe = RFE(model, n_features_to_select=2)
X_rfe = rfe.fit_transform(X, y)

# Print selected features
selected_features = X.columns[rfe.support_]