In [1]:
#==== Imports ====#
import numpy as np
import pandas as pd
from scipy.io import arff
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
#=================#

#==== Functions ====#
def logistic_regression_model(data,args=None):
    lr = LogisticRegression(solver='lbfgs',max_iter=10000000000)
    lr.fit(data[0],data[1])
    return lr
#===================#

In [2]:
#==== Functions ====#
def read_data(filename):
    data = arff.loadarff(filename)
    loaddata = pd.DataFrame(data[0])
    return loaddata

def process_data(loaddata):
    software_metrics = np.array(loaddata[['LOC_BLANK','BRANCH_COUNT','CALL_PAIRS','LOC_CODE_AND_COMMENT']])
    labels = np.array(loaddata['Defective'])
    print(software_metrics)
    print(labels)
    return software_metrics,labels

def train_data(software_metrics,labels):
    X_train, X_test, y_train, y_test = train_test_split(software_metrics, labels, test_size = 0.1)
    y_train = y_train.astype('str')
    y_test = y_test.astype('str')
    print(X_train)
    print(y_test)
    return X_train, X_test, y_train, y_test

def model_data(X_train,y_train,model):
    model = model.fit(X_train, y_train)
    return model

def evaluate_data(model,model_name,X_test,y_test):
    predictions = model.predict(X_test)
    print(f"Model Name: {model_name}")
    print(f'Accuracy: {round(metrics.accuracy_score(y_test, predictions)*100,2)}%')

def main(filename,model,model_name):
    loaddata = read_data(filename)
    software_metrics,labels = process_data(loaddata)
    X_train, X_test, y_train, y_test = train_data(software_metrics,labels)
    model = model_data(X_train, y_train, model)
    evaluate_data(model,model_name,X_test,y_test)
#===================#

In [3]:
#==== Main Algorithm ====#
if __name__=='__main__':
    # Change filename here
    filename = 'CM1.arff.txt'

    # Change model here
    model = LogisticRegression(solver='lbfgs',max_iter=10000000000)

    # Change model name here
    model_name = "Logisitc Regression"

    main(filename,model,model_name)
#=========================#

[[ 2.  3.  0.  0.]
 [ 3.  3.  0.  2.]
 [38. 35.  4.  5.]
 ...
 [ 3.  3.  1.  1.]
 [ 6.  9.  3. 10.]
 [ 1.  3.  4.  0.]]
[b'N' b'N' b'N' b'Y' b'N' b'N' b'N' b'Y' b'N' b'N' b'Y' b'N' b'N' b'N'
 b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'Y' b'Y' b'N' b'N' b'Y' b'N' b'N'
 b'N' b'N' b'N' b'Y' b'Y' b'N' b'N' b'N' b'N' b'Y' b'N' b'Y' b'N' b'N'
 b'N' b'N' b'N' b'Y' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N'
 b'N' b'N' b'N' b'N' b'N' b'Y' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'Y'
 b'N' b'Y' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N'
 b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'Y' b'N' b'N' b'N' b'N' b'N' b'N'
 b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'Y' b'N' b'N' b'N' b'N' b'N' b'N'
 b'Y' b'N' b'N' b'Y' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'Y' b'N' b'N'
 b'N' b'Y' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'Y'
 b'Y' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'Y' b'N' b'N'
 b'N' b'N' b'N' b'N' b'Y' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'N' b'Y'
 b'N' b'N' b'N' b'N' b'N' b'