In [None]:
from numpy import array
from ml_models import Classifier
from matplotlib import pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import roc_curve, roc_auc_score
plt.style.use("ggplot")

In [None]:
def data_preprocessor(fname):
    uvs, ovs, Np = 16, 20, 1
    with open(fname, 'r') as f:
        rules = f.read().split('\n')
        f.close()

    X,  Y = [], []
    for rule in rules:
        (UA, OA, P) = rule.split(';')
        temp_u, temp_o = [0]*uvs, [0]*ovs
        for i in UA.split(','):
            temp_u[int(i)] = 1
        for i in OA.split(','):
            temp_o[int(i)] = 1
        X.append(temp_u + temp_o)
        Y.append(int(P))
    return (array(X), array(Y))

In [None]:
models = ["SVM", "DT", "RF", "xgboost", "gradboost"]
X, Y = data_preprocessor("final_data.txt")
kfold_gen = KFold(n_splits=4, shuffle=True).split(X, Y)

In [None]:
for (fold, (train, test)) in enumerate(kfold_gen, 1):
    for model in models:
        clf = Classifier(ctype=model)
        clf.fit(X[train], Y[train])
        ypred = clf.predict(X[test])
        fpr, tpr, _ = roc_curve(Y[test], clf.predict_proba(X[test])[:,1])
        auc = roc_auc_score(Y[test], clf.predict(X[test]))
        plt.plot(fpr, tpr, label="%s ROC (area = %0.2f)" % (model, auc))
    plt.plot([0, 1], [0, 1],"r--")
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("1-Specificity (False Positive Rate)")
    plt.ylabel("Sensitivity (True Positive Rate)")
    plt.title(f"fold-{fold}: Receiver Operating Characteristic")
    plt.legend(loc="best")
    plt.savefig(f"ROC plot for fold-{fold}")
    plt.clf()