# Imports

In [22]:
import numpy as np
import pandas as pd
from scipy.io import arff
from sklearn.model_selection import train_test_split
from sklearn import metrics

# Suppress Warnings
import warnings
warnings.filterwarnings('ignore')

# Preprocessing

In [23]:
#========== IMPORTS =============#
# Allows jupyter notebook to be imported
import jupyter_import
from data_preproc.Preprocess import preprocess, Normalize
#================================#

# Feature Selection

In [24]:
from data_preproc.CFS import cfs_algo
from data_preproc.RFE import rfe_algo
from data_preproc.RR import ridge_algo

# Algorithms

## Base Predictors

In [25]:
from pred_mdls.base.Complement_Naive_Bayes import complement_naive_bayes_model
from pred_mdls.base.Decision_Tree import decision_tree_model
from pred_mdls.base.Logistic_Regression import logistic_regression_model
from pred_mdls.base.Multi_Layer_Perceptron import multi_layer_perceptron_model
from pred_mdls.base.Naive_Bayes import naive_bayes_model

## Ensemble Predictors

In [26]:
from pred_mdls.ensemble.Random_Forest import random_forest_model
from pred_mdls.ensemble.Rotation_Forest import rotation_forest_model
from pred_mdls.ensemble.Voting import voting_model

# Evaluation Metrics

In [27]:
from pf_eval.AUC_ROC import auc_roc_model
from pf_eval.F1_Score import f1_model
from pf_eval.CSV import write_results

In [1]:
def data_conversion(data):
    for i in range(len(data)):
        if data[i] == b'N':
            data[i] = 0
        else:
            data[i] = 1
    return data

def read_data(filename):
    data = arff.loadarff(filename)
    loaddata = pd.DataFrame(data[0])
    return loaddata

def process_data(loaddata,features):
    # Features are selected based on CFS
    software_metrics = np.array(loaddata[features])
    labels = np.array(loaddata['Defective'])
    return software_metrics,labels

def train_data(software_metrics,labels):
    X_train, X_test, y_train, y_test = train_test_split(software_metrics, labels, test_size = 0.1)
    y_train = y_train.astype('str')
    y_test = y_test.astype('str')
    return X_train, X_test, y_train, y_test

def evaluate_data(model,model_name,X_test,y_test):
    predictions = model.predict(X_test)
    auc_score = round(auc_roc_model(model,X_test,y_test),2)
    f1_score = round(f1_model(model,X_test,y_test),2)
    print(f"Model Name: {model_name}")
    print(f'Accuracy: {round(metrics.accuracy_score(y_test, predictions)*100,2)}%')
    print(f'AUC Score: {auc_score}')
    print(f'F1-score: {f1_score}')
    return auc_score,f1_score


    

def main(filename):
    # Read the file
    loaddata = read_data(filename)
    loaddata = Normalize(loaddata)
    #software_metrics = loaddata.iloc[:,:-1] #Software metrics
    #labels = loaddata.iloc[:,-1] #Labels
    SM = np.array(loaddata.iloc[:,:-1]) #Software metrics
    L = data_conversion(np.array(loaddata.iloc[:,-1])).astype(int) #Labels
    data = [SM,L]
       
    # ===== Feature Selection ====== #

    # ==== CFS ==== #
    cfs, cfs_selections = cfs_algo(data,10)
    # ============= #

    # ===== RFE ======== #
    rfe, rfe_selections = rfe_algo(data,10)
    # ================== #
    
    # ========= Preprocessing ============= #
    pp = preprocess(loaddata)
    pp_cfs = preprocess(loaddata, cfs_selections)
    pp_rfe = preprocess(loaddata, rfe_selections)

    pp_arr = [pp,pp_cfs,pp_rfe]
    pp_name = ['No filters','CFS Feature Selection','RFE Feature Selection']
    args = [1000]
    # ====================================== #

    result = []
    auc_arr = []
    f1_arr = []
    header = []

    for i,pp in enumerate(pp_arr):
        print(pp_name[i])
        data = [pp[0][0],pp[0][2]]
    
        # ======== Model Creation =========== #
        # Base Predictors
        cnb = complement_naive_bayes_model(data,args)
        dt = decision_tree_model(data,args)
        lr = logistic_regression_model(data,args)
        mlp = multi_layer_perceptron_model(data,args)
        nb = naive_bayes_model(data,args)

        # Ensemble Predictors
        rf = random_forest_model(data,args)
        rof = rotation_forest_model(data,args)
        vt = voting_model(data,args)
        # ==================================== #

        models = [cnb,dt,lr,mlp,nb,rf,rof,vt] 
        model_name = ['Complement Naive Bayes','Decision Tree','Logistic regression',
                        'Multi Layer Perceptron','Naive Bayes','Random Forest','Rotation Forest','Voting']
        for i in range(len(models)):
            print('*'*50)
            auc_score,f1_score = evaluate_data(models[i],model_name[i],pp[0][1],pp[0][3])
            auc_arr.append(auc_score)
            f1_arr.append(f1_score)
        print('='*100)
    header.append('Model Name')
    result.append(model_name)
    header.append('AUC')
    result.append(auc_arr)
    header.append('F1 Score')
    result.append(f1_arr)
    return result, header

def translate(result):
    count = 1
    res = []
    while count <= 3:
        for i in range(len(result[0])):
            res.append([result[0][i]], result[1][((i+1)*count)-1],result[2][((i+1)*count)-1])
        count += 1
    return res
            
            
if __name__=='__main__':
    filename = 'datasets/KC4.arff.txt'
    result, header = main(filename)
    result = translate(result)
    
    print(header)
    print(result)

IndentationError: expected an indented block (<ipython-input-1-8fbbe3edbec3>, line 113)