In [1]:
#==== Imports ====#
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split as ts
from scipy.io import arff
import pandas as pd
import numpy as np
#=================#

#==== Functions ====#
def data_conversion(data_column):
    for i in range(len(data_column)):
        if data_column[i] == b'N':
            data_column[i] = 0
        else:
            data_column[i] = 1
    return data_column

def rfe_algo(data, n_k=10):
    model = LogisticRegression(max_iter=10000000000) 
    model = RFE(model, n_k)
    rfe = model.fit(data[0], data[1])
    selections = rfe.support_
    return rfe, selections
#===================#

In [2]:
#==== Main Algorithm ====#
if __name__=='__main__':
    #File
    filename = 'CM1.arff.txt'
    data = arff.loadarff(filename)
    loaddata = pd.DataFrame(data[0])

    #Software metrics and decisions
    SM = np.array(loaddata[['LOC_BLANK','BRANCH_COUNT','CALL_PAIRS','LOC_CODE_AND_COMMENT']]) #Software metrics
    L = data_conversion(np.array(loaddata['Defective'])) #label
    print(f'SM: {SM.shape}')
    print(f'L: {L.shape}')
    SM_train, SM_test, L_train, L_test = ts(SM,L,test_size = 0.1)
    L_train=L_train.astype(int)

    #Algorithm Tests
    data = [SM_train,L_train]
    print(data)
    test, selections = rfe_algo(data)
    #print("Num Features: %s" % (test.n_features_))
    print("Selected Features: %s" % (selections))
    #print("Feature Ranking: %s" % (test.ranking_))
    #=========================#

Selected Features: [ True  True  True  True]


