In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import chi2, SelectKBest


In [2]:
data = {
    "Gender": ["Male", "Female", "Male", "Male", "Female"],
    "Marital_Status": ["Yes", "No", "Yes", "No", "Yes"],
    "Education_Level": ["Graduate", "Non-Graduate", "Graduate", "Graduate", "Non-Graduate"],
    "Applicant_Income": [5000, 3000, 4000, 6000, 3500],
    "Prop_Area": ["Urban", "Rural", "Urban", "Rural", "Urban"],
    "Fav_Color": ["Red", "Blue", "Green", "Red", "Blue"],
    "Loan_Status": ["Approved", "Rejected", "Approved", "Approved", "Rejected"]
}

df = pd.DataFrame(data)


In [3]:
le = LabelEncoder()

for column in df.columns:
    df[column] = le.fit_transform(df[column])


In [4]:
x = df.drop("Loan_Status", axis=1)
y = df["Loan_Status"]


In [6]:
chi_scores = chi2(x, y)
chi_results = pd.DataFrame({
    "Feature": x.columns,
    "Chi2 Score": chi_scores[0],
    "p-value": chi_scores[1]
})
print(chi_results.sort_values(by="Chi2 Score", ascending=False))


            Feature  Chi2 Score   p-value
3  Applicant_Income    3.750000  0.052808
5         Fav_Color    3.333333  0.067889
2   Education_Level    3.000000  0.083265
0            Gender    2.000000  0.157299
1    Marital_Status    0.055556  0.813664
4         Prop_Area    0.055556  0.813664


In [7]:
selector = SelectKBest(score_func=chi2, k=3)
x_new = selector.fit_transform(x, y)
selected_features = x.columns[selector.get_support()]
print("Selected Features:", selected_features)


Selected Features: Index(['Education_Level', 'Applicant_Income', 'Fav_Color'], dtype='object')


In [8]:
import pandas as pd
from sklearn.datasets import load_breast_cancer

In [9]:
#Load the dataset
dt=load_breast_cancer()
a=pd.DataFrame(dt.data,columns=dt.feature_names)
print(a.head())

   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  worst radius  worst texture  worst perimeter  \
0           

In [10]:
print(a.shape)

(569, 30)


In [22]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SequentialFeatureSelector
b=dt.target
a_train,a_test,b_train,b_test=train_test_split(a,b,test_size=0.2,random_state=42)
model=LogisticRegression(max_iter=500)

In [23]:
#Forward Selection
sfs=SequentialFeatureSelector(model,n_features_to_select=5,direction='forward')
sfs.fit(a_train,b_train)
s_features=a_train.columns[sfs.get_support()]
print("Selected Features:",s_features)

Selected Features: Index(['mean radius', 'mean area', 'worst texture', 'worst perimeter',
       'worst compactness'],
      dtype='object')


In [24]:
#Forward Selection
sfs=SequentialFeatureSelector(model,n_features_to_select=5,direction='backward')
sfs.fit(a_train,b_train)
s_features=a_train.columns[sfs.get_support()]
print("Selected Features:",s_features)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=500).
You might also want to sca

Selected Features: Index(['mean radius', 'worst radius', 'worst texture', 'worst perimeter',
       'worst concavity'],
      dtype='object')


In [25]:
from sklearn.feature_selection import RFE
model=LogisticRegression(max_iter=100)
rfe=RFE(model,n_features_to_select=5)
rfe.fit(a_train,b_train)
s_features=a_train.columns[rfe.support_]
print("Selected Features:",s_features)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to sca

Selected Features: Index(['mean concavity', 'worst compactness', 'worst concavity',
       'worst concave points', 'worst symmetry'],
      dtype='object')


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to sca