# Imports

In [25]:
import numpy as np
import pandas as pd
from scipy.io import arff
from sklearn.metrics import confusion_matrix

In [26]:
def f1_model(model,X_test,y_test):
    predictions = model.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, predictions).ravel()
    precision = (tp)/(tp+fp)
    recall = (tp)/(tp+fn)
    f1_score = (2*precision*recall)/(precision+recall)
    return f1_score

## Reading data

In [27]:
if __name__ == '__main__':
    # Change filename here
    filename = '../datasets/NASA/KC4.arff.txt'

    data = arff.loadarff(filename)
    loaddata = pd.DataFrame(data[0])
    loaddata.head()

## Acquire X and Y values

In [28]:
if __name__ == '__main__':
    software_metrics = np.array(loaddata[['LOC_BLANK','BRANCH_COUNT','CALL_PAIRS','LOC_CODE_AND_COMMENT']])
    labels = np.array(loaddata['Defective'])

## Train test Split

In [29]:
if __name__ == '__main__':
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(software_metrics, labels, test_size = 0.1)
    y_train = y_train.astype(str)
    y_test = y_test.astype(str)

## Selecting a Model (Logistic Regression)

In [30]:
if __name__ == '__main__':
    from sklearn.linear_model import LogisticRegression

    logreg = LogisticRegression(solver='lbfgs')
    logreg.fit(X_train, y_train)

In [31]:
if __name__ == '__main__':    
    predictions = logreg.predict(X_test)
    predictions

In [32]:
if __name__ == '__main__':
    pd.DataFrame(confusion_matrix(y_test, predictions), columns=['Predicted Non-Defective', "Predicted Defective"], index=['Actual Non-Defective', 'Actual Defective'])

In [33]:
if __name__ == '__main__':    
    tn, fp, fn, tp = confusion_matrix(y_test, predictions).ravel()
    print(f'True Positives: {tp}')
    print(f'False Positives: {fp}')
    print(f'True Negatives: {tn}')
    print(f'False Negatives: {fn}')

True Positives: 3
False Positives: 0
True Negatives: 5
False Negatives: 5


## Calculating Precision (True Positive)/(True Positive + False Positive)

In [34]:
if __name__ == '__main__':
    precision = (tp)/(tp+fp)
    print(f'Precision: {precision}')

Precision: 1.0


## Calculating Recall (True Positve)/(True Positive + False Negative)

In [35]:
if __name__ == '__main__':  
    recall = (tp)/(tp+fn)
    print(f'Recall: {recall}')

Recall: 0.375


## Calculating F1-score

In [36]:
if __name__ == '__main__':   
    f1_score = (2*precision*recall)/(precision+recall)
    print(f'F1-score: {f1_score}')

F1-score: 0.5454545454545454
