In [9]:
# All imports
import numpy as np
import pandas as pd
from scipy.io import arff
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.ensemble import VotingClassifier

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

In [11]:
def read_data(filename):
    data = arff.loadarff(filename)
    loaddata = pd.DataFrame(data[0])
    return loaddata

def process_data(loaddata):
    software_metrics = np.array(loaddata[['LOC_BLANK','BRANCH_COUNT','CALL_PAIRS','LOC_CODE_AND_COMMENT']])
    labels = np.array(loaddata['Defective'])
    return software_metrics,labels

def train_data(software_metrics,labels):
    X_train, X_test, y_train, y_test = train_test_split(software_metrics, labels, test_size = 0.1)
    y_train = y_train.astype('str')
    y_test = y_test.astype('str')
    return X_train, X_test, y_train, y_test

def model_data(X_train,y_train,model):
    model = model.fit(X_train, y_train)
    return model

def evaluate_data(model,model_name,X_test,y_test):
    predictions = model.predict(X_test)
    print(f"Model Name: {model_name}")
    print(f'Accuracy: {round(metrics.accuracy_score(y_test, predictions)*100,2)}%')

def main(filename,model,model_name):
    loaddata = read_data(filename)
    software_metrics,labels = process_data(loaddata)
    X_train, X_test, y_train, y_test = train_data(software_metrics,labels)
    model = model_data(X_train, y_train, model)
    evaluate_data(model,model_name,X_test,y_test)

# Change filename here
filename = 'CM1.arff.txt'

logistic_regression = LogisticRegression(solver = 'lbfgs')
random_forest = RandomForestClassifier(n_estimators = 1000)
naive_bayes = GaussianNB()

labels = ['Logistic Regression', 'Random Forest', 'Naive Bayes']

# Change model here
model = VotingClassifier(estimators = [(labels[0],logistic_regression),
                                       (labels[1],random_forest),
                                       (labels[2],naive_bayes)])

# Change model name here
model_name = "Voting Classifier"

main(filename,model,model_name)

Model Name: Voting Classifier
Accuracy: 98.04%
