## Feature Selection:

In [3]:
#Reading and displaying the heart dataset from a CSV file.
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
df=pd.read_csv("heart_dataset.csv")
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221,0,1,164,1,0.0,2,0,2,1
1021,60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
1022,47,1,0,110,275,0,0,118,1,1.0,1,1,2,0
1023,50,0,0,110,254,0,0,159,0,0.0,2,0,2,1


## Recursive Feature Selection:

In [5]:
# Recursive Feature Elimination
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

# Separate features (X) and target (y)
X = df.drop('target', axis=1)  # Assuming 'target' is the column name of the target variable
y = df['target']

# Initialize the logistic regression model
estimator = LogisticRegression()

# Perform Recursive Feature Elimination (RFE) with logistic regression
# Here, we'll select the top 5 features
rfe = RFE(estimator, n_features_to_select=5)
X_selected = rfe.fit_transform(X, y)

# Get the selected feature names
selected_features_rfe = X.columns[rfe.support_].tolist()
print("\nSelected Features:")
print(selected_features_rfe)




Selected Features:
['sex', 'exang', 'slope', 'ca', 'thal']


## Pearson Correlation Feature Selection:

In [6]:
#Pearson Correlation Coefficient Feature Selection
from sklearn.feature_selection import SelectKBest, f_classif
import pandas as pd
# Select the top 5 features based on Pearson correlation coefficient
selector = SelectKBest(score_func=f_classif, k=8)
X_selected = selector.fit_transform(X, y)

# Get the selected feature names
selected_features_pearson = X.columns[selector.get_support()].tolist()
print("Selected Features:")
print(selected_features_pearson)


Selected Features:
['sex', 'cp', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal']


## Embedded Feature Selection:

In [7]:
#Embedded Feature Selection
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

# Initialize Random Forest Classifier
# Adjust n_estimators and other hyperparameters as needed
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Fit the model to perform embedded feature selection
rf.fit(X, y)

# Get feature importance scores
feature_importances = rf.feature_importances_

# Get the indices of the top k most important features
k = 5  # Number of selected features
top_indices = feature_importances.argsort()[-k:][::-1]

# Get the names of the selected features
selected_features_rf = X.columns[top_indices].tolist()
print("Selected Features:")
print(selected_features_rf)


Selected Features:
['cp', 'thalach', 'ca', 'oldpeak', 'thal']


## Forward Feature Selection:

In [9]:
# Forward feature selection
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import pandas as pd

# Separate features (X) and target (y)
X = df.drop('target', axis=1)  # Assuming 'target' is the column name of the target variable
y = df['target']

# Initialize the logistic regression model
estimator = LogisticRegression()

# Perform forward feature selection
selector = SequentialFeatureSelector(estimator, n_features_to_select=5, direction='forward', scoring='accuracy', cv=5)
selector.fit(X, y)

# Get the selected feature names
selected_features_forward = X.columns[selector.support_].tolist()
print("\nSelected Features (Forward Selection):")
print(selected_features_forward)



Selected Features (Forward Selection):
['sex', 'cp', 'trestbps', 'oldpeak', 'ca']


In [8]:
#Concatenated above three feature selection
# Convert selected features from each method to sets
rfe_selected_features = set(selected_features_rfe)
pearson_selected_features = set(selected_features_pearson)
random_forest_selected_features = set(selected_features_rf)

# Find the intersected features
intersected_features = rfe_selected_features.intersection(pearson_selected_features, random_forest_selected_features)

print("Intersected Features:")
print(intersected_features)


Intersected Features:
{'ca', 'thal'}
