In [None]:
from csv import writer
from numpy import array
from ml_models import Classifier
from matplotlib import pyplot as plt
from sklearn.model_selection import KFold
plt.style.use("ggplot")

In [None]:
def data_preprocessor(fname):
    uvs, ovs, Np = 16, 20, 1
    with open(fname, 'r') as f:
        rules = f.read().split('\n')
        f.close()

    X,  Y = [], []
    for rule in rules:
        (UA, OA, P) = rule.split(';')
        temp_u, temp_o = [0]*uvs, [0]*ovs
        for i in UA.split(','):
            temp_u[int(i)] = 1
        for i in OA.split(','):
            temp_o[int(i)] = 1
        X.append(temp_u + temp_o)
        Y.append(int(P))
    return (array(X), array(Y))

In [None]:
models = ["SVM", "DT", "RF", "xgboost", "gradboost"]
csvfields = ["confidence for class-0", "confidence for class-1", "actual class"]
X, Y = data_preprocessor("final_data.txt")
kfold_gen = KFold(n_splits=4, shuffle=True).split(X, Y)

In [None]:
for (fold, (train, test)) in enumerate(kfold_gen, 1):
    for model in models:
        clf = Classifier(ctype=model)
        clf.fit(X[train], Y[train])
        class_probs = clf.predict_proba(X[test])
        with open(f"CSV_Files/{model}-{fold}-scores.csv", 'w') as csvfile:
            csvwriter =  writer(csvfile)
            csvwriter.writerow(csvfields)
            for (row, label) in zip(class_probs, Y[test]):
                csvwriter.writerow([round(row[0], 3), round(row[1], 3), label])
            csvfile.close()