In [1]:
import cv2
import pandas as pd
from mlxtend.feature_selection import SequentialFeatureSelector
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
import statsmodels.api as sm

In [2]:
GLCM = pd.read_csv('/kaggle/input/graylevel/GLCM (1).csv')
GLRLM = pd.read_csv('/kaggle/input/graylevel/GLRLM (1).csv')
GLSZM = pd.read_csv('/kaggle/input/graylevel/GLSZM (1).csv')

In [3]:
XC = GLCM.iloc[:, 1:11]
XRL = GLRLM.iloc[:, 1:11]
XSZ = GLSZM.iloc[:, 1:11]
y = GLRLM.iloc[:, 0]
target_mapping = {label: idx for idx, label in enumerate(y.unique())}
y = y.map(target_mapping)

In [4]:
XC_train, XC_test, yC_train, yC_test = train_test_split(XC,y, test_size=0.3, random_state=100)
XRL_train, XRL_test, yRL_train, yRL_test = train_test_split(XRL,y, test_size=0.3, random_state=100)
XSZ_train, XSZ_test, ySZ_train, ySZ_test = train_test_split(XSZ,y, test_size=0.3, random_state=100)

In [5]:
def forward_selection(data, target, significance_level=0.05):
    initial_features = data.columns.tolist()
    best_features = []
    while (len(initial_features)>0):
        remaining_features = list(set(initial_features)-set(best_features))
        new_pval = pd.Series(index=remaining_features)
        for new_column in remaining_features:
            model = sm.OLS(target, sm.add_constant(data[best_features+[new_column]])).fit()
            new_pval[new_column] = model.pvalues[new_column]
        min_p_value = new_pval.min()
        if(min_p_value<significance_level):
            best_features.append(new_pval.idxmin())
        else:
            break
    return best_features

In [6]:
print(forward_selection(XC_train,yC_train))
print(forward_selection(XRL_train,yRL_train))
print(forward_selection(XSZ_train,ySZ_train))

['Energy']
['original_glrlm_GrayLevelVariance', 'original_glrlm_RunLengthNonUniformity', 'original_glrlm_LongRunEmphasis', 'original_glrlm_ShortRunEmphasis', 'original_glrlm_RunEntropy']
['original_glszm_SmallAreaEmphasis', 'original_glszm_LargeAreaHighGrayLevelEmphasis', 'original_glszm_GrayLevelVariance', 'original_glszm_GrayLevelNonUniformity', 'original_glszm_SmallAreaLowGrayLevelEmphasis', 'original_glszm_LowGrayLevelZoneEmphasis']


In [7]:
forward_feature_selection_GLCM = SequentialFeatureSelector(RandomForestClassifier(n_jobs=-1),
                                                     k_features= 'best',
                                                     forward=True,
                                                     floating=False,
                                                     verbose=2,
                                                     scoring= "accuracy",
                                                     cv= 5).fit(XC_train, yC_train)


[2024-12-25 15:43:21] Features: 1/3 -- score: 0.3148742924838752
[2024-12-25 15:43:25] Features: 2/3 -- score: 0.40582247378351105
[2024-12-25 15:43:27] Features: 3/3 -- score: 0.4854723355710588

In [8]:
forward_feature_selection_GLCM.k_feature_names_

('Contrast', 'Energy', 'Correlation')

In [9]:
forward_feature_selection_GLRLM = SequentialFeatureSelector(RandomForestClassifier(n_jobs=-1),
                                                     k_features= 'best',
                                                     forward=True,
                                                     floating=False,
                                                     verbose=2,
                                                     scoring= "accuracy",
                                                     cv= 5).fit(XRL_train, yRL_train)


[2024-12-25 15:43:45] Features: 1/10 -- score: 0.327997893905489
[2024-12-25 15:44:02] Features: 2/10 -- score: 0.4395375367469615
[2024-12-25 15:44:16] Features: 3/10 -- score: 0.4779957000570401
[2024-12-25 15:44:30] Features: 4/10 -- score: 0.5248563029265938
[2024-12-25 15:44:41] Features: 5/10 -- score: 0.5492255714975209
[2024-12-25 15:44:50] Features: 6/10 -- score: 0.5539335702689658
[2024-12-25 15:44:58] Features: 7/10 -- score: 0.558624018252819
[2024-12-25 15:45:03] Features: 8/10 -- score: 0.5520380852090738
[2024-12-25 15:45:07] Features: 9/10 -- score: 0.5483041551489622
[2024-12-25 15:45:09] Features: 10/10 -- score: 0.5501601509367733

In [10]:
forward_feature_selection_GLRLM.k_feature_names_

('original_glrlm_GrayLevelVariance',
 'original_glrlm_LongRunEmphasis',
 'original_glrlm_LongRunHighGrayLevelEmphasis',
 'original_glrlm_LowGrayLevelRunEmphasis',
 'original_glrlm_RunEntropy',
 'original_glrlm_RunLengthNonUniformity',
 'original_glrlm_ShortRunEmphasis')

In [11]:
forward_feature_selection_GLSZM = SequentialFeatureSelector(RandomForestClassifier(n_jobs=-1),
                                                     k_features= 'best',
                                                     forward=True,
                                                     floating=False,
                                                     verbose=2,
                                                     scoring= "accuracy",
                                                     cv= 5).fit(XSZ_train, ySZ_train)


[2024-12-25 15:45:27] Features: 1/10 -- score: 0.31865648720986356
[2024-12-25 15:45:44] Features: 2/10 -- score: 0.40676582861655924
[2024-12-25 15:45:58] Features: 3/10 -- score: 0.46020797683296044
[2024-12-25 15:46:12] Features: 4/10 -- score: 0.4995480672195165
[2024-12-25 15:46:23] Features: 5/10 -- score: 0.519235663200386
[2024-12-25 15:46:32] Features: 6/10 -- score: 0.5323461015313062
[2024-12-25 15:46:40] Features: 7/10 -- score: 0.531411522092054
[2024-12-25 15:46:46] Features: 8/10 -- score: 0.5342240358036067
[2024-12-25 15:46:50] Features: 9/10 -- score: 0.5417357728928086
[2024-12-25 15:46:52] Features: 10/10 -- score: 0.5314159097889517

In [12]:
forward_feature_selection_GLSZM.k_feature_names_

('original_glszm_GrayLevelNonUniformity',
 'original_glszm_GrayLevelNonUniformityNormalized',
 'original_glszm_GrayLevelVariance',
 'original_glszm_HighGrayLevelZoneEmphasis',
 'original_glszm_LargeAreaHighGrayLevelEmphasis',
 'original_glszm_SmallAreaEmphasis',
 'original_glszm_SmallAreaLowGrayLevelEmphasis',
 'original_glszm_ZoneEntropy',
 'original_glszm_ZonePercentage')

In [13]:
bakward_feature_selector = SequentialFeatureSelector(RandomForestClassifier(n_jobs=-1),
                                                    k_features='best',
                                                    forward=False,
                                                    floating=True,
                                                    verbose=2,
                                                    scoring= "accuracy",
                                                    cv= 5).fit(XRL_train, yRL_train)


[2024-12-25 15:47:13] Features: 9/1 -- score: 0.5567416962836207
[2024-12-25 15:47:30] Features: 8/1 -- score: 0.5567153701022333
[2024-12-25 15:47:49] Features: 7/1 -- score: 0.5538984686937827
[2024-12-25 15:48:08] Features: 6/1 -- score: 0.5520600236935633
[2024-12-25 15:48:32] Features: 6/1 -- score: 0.559527883813786
[2024-12-25 15:48:51] Features: 5/1 -- score: 0.5539160194813741
[2024-12-25 15:49:10] Features: 4/1 -- score: 0.5398666140143038
[2024-12-25 15:49:37] Features: 4/1 -- score: 0.5464042823921723
[2024-12-25 15:49:56] Features: 3/1 -- score: 0.5333157825457417
[2024-12-25 15:50:14] Features: 2/1 -- score: 0.4451976657452503
[2024-12-25 15:50:32] Features: 1/1 -- score: 0.2923961212759423

In [14]:
bakward_feature_selector.k_feature_names_

('original_glrlm_LongRunEmphasis',
 'original_glrlm_RunEntropy',
 'original_glrlm_RunLengthNonUniformity',
 'original_glrlm_ShortRunEmphasis',
 'original_glrlm_ShortRunHighGrayLevelEmphasis',
 'original_glrlm_ShortRunLowGrayLevelEmphasis')

In [15]:
backward_feature_selector.k_score_

NameError: name 'backward_feature_selector' is not defined