In [7]:
#==== Imports ====#
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split as ts
from scipy.io import arff
import pandas as pd
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
#=================#

#==== Functions ====#
def data_conversion(data_column):
    for i in range(len(data_column)):
        if data_column[i] == b'N':
            data_column[i] = 0
        else:
            data_column[i] = 1
    return data_column

def rfe_algo(data):
    model = LogisticRegression(solver='lbfgs',max_iter=10000000000) 
    pipe = make_pipeline(StandardScaler(), RFE(model, 3))
    rfe = pipe.fit(data[0], data[1])
    return rfe
#===================#

In [4]:
#==== Main Algorithm ====#
#File
filename = 'KC4.arff.txt'
data = arff.loadarff(filename)
loaddata = pd.DataFrame(data[0])

#Software metrics and decisions
SM = np.array(loaddata[['LOC_BLANK','BRANCH_COUNT','CALL_PAIRS','LOC_CODE_AND_COMMENT']]) #Software metrics
L = data_conversion(np.array(loaddata['Defective'])) #label
SM_train, SM_test, L_train, L_test = ts(SM,L,test_size = 0.1)
L_train=L_train.astype(int)

# Standard Scalar
scalar = StandardScaler().fit(SM_train)
SM_scaled = scalar.transform(SM_train)

#Algorithm Tests
data = [SM_scaled,L_train]
test = rfe_algo(data)
print("Num Features: %s" % (test.n_features_))
print("Selected Features: %s" % (test.support_))
print("Feature Ranking: %s" % (test.ranking_))
#=========================#

Num Features: 3
Selected Features: [False  True  True  True]
Feature Ranking: [2 1 1 1]


