In [7]:
#==== Imports ====#
from scipy.io import arff
import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
#=================#

#==== Functions ====#
def cfs_algo(data, n_k=10):
    cfs = SelectKBest(score_func=f_classif, k=n_k)
    cfs.fit(data[0],data[1])
    supports = cfs.get_support(True)
    selections = []
    for i in range(len(data[0][0])):
        if i in supports:
            selections.append(True)
        else:
            selections.append(False)
    return cfs, selections

def data_conversion(data):
    for i in range(len(data)):
        if data[i] == b'N':
            data[i] = 0
        else:
            data[i] = 1
    return data
#===================#

In [8]:
#==== Main Algorithm ====#
if __name__=='__main__':
    raw = arff.loadarff('KC4.arff.txt')
    data = pd.DataFrame(raw[0])
    fs = SelectKBest(score_func=f_classif, k=5)
    SM = np.array(data.iloc[:,:-1]) #Software metrics
    L = data_conversion(np.array(data.iloc[:,-1])).astype(int) #Labels
    print(f'SM: {SM.shape}')
    print(f'L: {L.shape}')
    fit = fs.fit(SM,L)
    np.set_printoptions(precision=3, suppress = True)
    features = fit.transform(SM)
    #print(SM)
    print(cfs_algo([SM,L]))
#=========================#

SM: [[  0.   7.   5. ...   1.   0. 100.]
 [  0. 125.  11. ...   1.   0. 489.]
 [  0.  23.   2. ...   1.   0.  62.]
 ...
 [  0.   3.   9. ...   1.   0.  59.]
 [  0.   1.   1. ...   1.   0. 204.]
 [  0.  11.   9. ...   1.   0.  76.]]
L: [0 1 1 1 0 1 1 0 1 1 0 0 0 0 1 1 1 0 1 1 1 0 0 1 1 0 1 1 1 0 1 0 1 0 1 0 0
 1 0 0 0 1 1 0 1 0 0 1 0 0 1 0 1 0 1 1 1 0 1 1 0 1 1 0 1 1 0 0 0 1 0 0 1 1
 0 1 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 1 1 0 0 1 1 0 0 1 0 0 1 1 0 0 1 1 1 1 1
 0 0 0 0 0 1 0 1 1 0 1 0 0 1]
(SelectKBest(), [False, True, True, False, False, False, True, True, False, False, True, True, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, True, True, False, False, False, False, False, False, False, False])


 37 38] are constant.
  f = msb / msw
 37 38] are constant.
  f = msb / msw


In [9]:
#==== Temporary Tests ====#
#print(fit.get_support(True))
#print(fit.scores_)
#print(features[0:5,:])
#=========================#