# Imports

In [21]:
import numpy as np
import pandas as pd
from scipy.io import arff
from sklearn.model_selection import train_test_split
from sklearn import metrics

# Preprocessing

In [22]:
#========== IMPORTS =============#
# Allows jupyter notebook to be imported
import jupyter_import
from data_preproc.Preprocess import preprocess
#================================#

# Feature Selection

In [23]:
from data_preproc.CFS import cfs_algo
from data_preproc.RFE import rfe_algo
from data_preproc.RR import ridge_algo

# Algorithms

## Base Predictors

In [11]:
from pred_mdls.base.Complement_Naive_Bayes import complement_naive_bayes_model
from pred_mdls.base.Decision_Tree import decision_tree_model
from pred_mdls.base.Logistic_Regression import logistic_regression_model
from pred_mdls.base.Multi_Layer_Perceptron import multi_layer_perceptron_model
from pred_mdls.base.Naive_Bayes import naive_bayes_model

## Ensemble Predictors

In [12]:
from pred_mdls.ensemble.Random_Forest import random_forest_model
from pred_mdls.ensemble.Rotation_Forest import rotation_forest_model
from pred_mdls.ensemble.Voting import voting_model

In [24]:
def data_conversion(data):
    for i in range(len(data)):
        if data[i] == b'N':
            data[i] = 0
        else:
            data[i] = 1
    return data

def read_data(filename):
    data = arff.loadarff(filename)
    loaddata = pd.DataFrame(data[0])
    return loaddata

def process_data(loaddata,features):
    # Features are selected based on CFS
    software_metrics = np.array(loaddata[features])
    labels = np.array(loaddata['Defective'])
    return software_metrics,labels

def train_data(software_metrics,labels):
    X_train, X_test, y_train, y_test = train_test_split(software_metrics, labels, test_size = 0.1)
    y_train = y_train.astype('str')
    y_test = y_test.astype('str')
    return X_train, X_test, y_train, y_test

def feature_selection(fs,X):
    dfscores = pd.DataFrame(fs.scores_)
    dfcolumns = pd.DataFrame(X.columns)
    feature_scores = pd.concat([dfcolumns,dfscores.fillna(0)],axis=1)
    feature_scores.columns = ['Feature','Score']
    best_ten_features = feature_scores.nlargest(10,'Score')
    return best_ten_features

# def feature_selection_algo(X,Y):
#     array = []
#     data = [X,Y]
#     cfs = cfs_algo(data)
#     ridge = ridge_algo(data)
#     print(f'Ridge score: {ridge.get_support()}')
    # rfe = rfe_algo(data)
    # print(f'RFE score: {rfe.score}')

def main(filename):
    # Read the file
    loaddata = read_data('datasets/'+filename)

    software_metrics = loaddata.iloc[:,:-1] #Software metrics
    labels = loaddata.iloc[:,-1] #Labels

    X_train, X_test, y_train, y_test = train_data(software_metrics,labels)

    # ===== Feature Selection ====== #

    # ==== CFS ==== #
    data = [X_train,y_train]
    cfs = cfs_algo(data)
    cfs_best_ten_features = feature_selection(cfs,software_metrics)
    print(cfs_best_ten_features)
    # ============= #

    # ===== Ridge ===== #
    software_metrics = np.array(loaddata.iloc[:,:-1]) #Software metrics
    labels = data_conversion(np.array(loaddata.iloc[:,-1])) #label 

    X_train, X_test, y_train, y_test = train_data(software_metrics,labels)

    data = [X_train,y_train]
    ridge = ridge_algo(data)
    # print(ridge.intercept_)
    # print(ridge.score(software_metrics,labels))
    # features = best_ten_features['Feature'].values
    # ===================== #

    # ===== RFE ======== #
    rfe = rfe_algo(data)
    print(rfe.ranking_)

    # software_metrics,labels = process_data(loaddata,features)

    # X_train, X_test, y_train, y_test = train_data(software_metrics,labels)

    # Preprocessing
    preprocessed_data = preprocess(filename)

    # Algorithms
    # return preprocessed_data

filename = 'KC4.arff.txt'
main(filename)

                            Feature      Score
27             MAINTENANCE_SEVERITY  33.127413
2                        CALL_PAIRS  22.088178
31  NORMALIZED_CYLOMATIC_COMPLEXITY  15.380972
11                   DESIGN_DENSITY  11.439184
30                       NODE_COUNT   7.730448
12                       EDGE_COUNT   7.669434
1                      BRANCH_COUNT   7.149434
6             CYCLOMATIC_COMPLEXITY   7.149434
10                DESIGN_COMPLEXITY   6.595051
13             ESSENTIAL_COMPLEXITY   1.114354


 37 38] are constant.
  f = msb / msw


AttributeError: 'Pipeline' object has no attribute 'ranking_'