In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2,f_classif,mutual_info_classif
from sklearn.feature_selection import RFE
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

# **Feature Selection**

In [None]:
df = pd.read_csv('diabetes_feature_selection_dataset.csv')

In [None]:
df

Unnamed: 0,Age,BMI,Glucose,BloodPressure,HospitalVisits,ExerciseLevel,HasDiabetes
0,25,22,90,80,1,2,0
1,40,30,150,85,5,0,1
2,30,28,120,82,3,1,1
3,22,20,85,75,1,3,0
4,35,27,140,88,4,1,1
5,28,25,110,79,2,2,0
6,50,31,160,90,6,0,1
7,45,29,145,87,5,1,1
8,33,26,130,84,3,2,1
9,26,23,100,78,1,3,0


In [None]:
X = df.drop('HasDiabetes',axis=1)
y = df['HasDiabetes']

# ***i).Scoring function***

###**f_classif**

In [None]:
selector = SelectKBest(score_func=f_classif, k=2)
selector.fit(X,y)
selected_features = X.columns[selector.get_support()]
print("Selected Features: ",selected_features)

Selected Features:  Index(['BMI', 'Glucose'], dtype='object')


## **chi2**

In [None]:
selector = SelectKBest(score_func=chi2, k=2)
selector.fit(X,y)
selected_features = X.columns[selector.get_support()]
print("Selected Features: ",selected_features)

Selected Features:  Index(['Age', 'Glucose'], dtype='object')


### **mutual_info_classif**

In [None]:
selector = SelectKBest(score_func=mutual_info_classif, k=2)
selector.fit(X,y)
selected_features = X.columns[selector.get_support()]
print("Selected Features: ",selected_features)

Selected Features:  Index(['Glucose', 'HospitalVisits'], dtype='object')


# ***ii).RFE-Recursive Feature Elimination***

In [None]:
selector = RFE(estimator= LogisticRegression(),n_features_to_select=3,step=1)
selector.fit(X,y)
selected_features = X.columns[selector.get_support()]
print("Selected Features: ",selected_features)
print("Coefficients: ", selector.estimator_.coef_)

Selected Features:  Index(['BMI', 'Glucose', 'BloodPressure'], dtype='object')
Coefficients:  [[0.14869111 0.50204227 0.14990037]]


# ***iii).SelectFromModel***

In [None]:
selector = SelectFromModel(estimator=RandomForestClassifier(),threshold='mean')
selector.fit(X,y)
selected_features = X.columns[selector.get_support()]
print("Selected Features: ",selected_features)

Selected Features:  Index(['BMI', 'Glucose', 'BloodPressure', 'HospitalVisits'], dtype='object')
