In [1]:
#==== Imports ====#
import numpy as np
import pandas as pd
from scipy.io import arff
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
#=================#

#==== Functions ====#
def decision_tree_model(data,args):
    dt = DecisionTreeClassifier()
    dt.fit(data[0],data[1])
    return dt
#===================#

In [2]:
#==== Functions ====#
def read_data(filename):
    data = arff.loadarff(filename)
    loaddata = pd.DataFrame(data[0])
    return loaddata

def process_data(loaddata):
    software_metrics = np.array(loaddata[['LOC_BLANK','BRANCH_COUNT','CALL_PAIRS','LOC_CODE_AND_COMMENT']])
    labels = np.array(loaddata['Defective'])
    return software_metrics,labels

def train_data(software_metrics,labels):
    X_train, X_test, y_train, y_test = train_test_split(software_metrics, labels, test_size = 0.1)
    y_train = y_train.astype('str')
    y_test = y_test.astype('str')
    return X_train, X_test, y_train, y_test

def model_data(X_train,y_train,model):
    model = model.fit(X_train, y_train)
    return model

def evaluate_data(model,model_name,X_test,y_test):
    predictions = model.predict(X_test)
    print(f"Model Name: {model_name}")
    print(f'Accuracy: {round(metrics.accuracy_score(y_test, predictions)*100,2)}%')

def main(filename,model,model_name):
    loaddata = read_data(filename)
    software_metrics,labels = process_data(loaddata)
    X_train, X_test, y_train, y_test = train_data(software_metrics,labels)
    model = model_data(X_train, y_train, model)
    evaluate_data(model,model_name,X_test,y_test)
#===================#


In [3]:
#==== Main Algorithm ====#
if __name__=='__main__':
    # Change filename here
    filename = 'CM1.arff.txt'

    # Change model here
    model = DecisionTreeClassifier()

    # Change model name here
    model_name = "Decision_Tree"

    main(filename,model,model_name)
    #=========================#

Model Name: Decision_Tree
Accuracy: 81.82%
