In [1]:
#importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt #only for visualizations, otherwise skip
from collections import Counter
from sklearn.neighbors import KNeighborsClassifier, NearestNeighbors
from sklearn.model_selection import cross_validate, cross_val_score, cross_val_predict
from sklearn import metrics
import sys
if not sys.warnoptions:
    import warnings
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import RadiusNeighborsClassifier

In [1]:
#loading data (here, file 'example_data.csv' with separator ';'); one file per supply chain phase, e.g. one file for handling, one file for preconditioning etc.
#data should have the following columns: 'deviation reason', 'deviation measure', 'sensor role', 'absolute setpoint deviation',
#'slope of two most recent temperature measurements', 'average setpoint deviation within one last hour',
#'membership degree in a fuzzy set "transportation and/or storage" with regard to the previous physical handling point',
#'membership degree in a fuzzy set "transportation and/or storage" with regard to the next physical handling point'

#first two columns correspond to the target variables in two prediction settings (for deviation reason and corrective measure); others comprise predictor features
df = pd.read_csv('example_data.csv', sep = ';')
df.head(1)

In [3]:
#optimal radius values, i.e. first cut points minimizing entropy, for deviation reason ('rr') and corrective measure ('rm')
#(as an example, 'rr' will be used further in this code, however, replacing it with 'rm' allows for making predictions for
#corrective measures, provided that the file 'example_data.csv' contains corrective measures is taken from 'example_data.csv' as a target variable)
rr = rr #calculated in the course of entropy minimization procedure for deviation reasons
rm = rm #calculated in the course of entropy minimization procedure for corrective measures

In [6]:
#creating a dataframe to which we will write voting results; can be used to back-engineer the voting process and understand the reasons for a specific prediction
#colums: 'true/actual label', 'k-NN prediction', 'radius neighbor prediction', 'decision tree prediction', 'final prediction of the first round', '1-NN prediction', 'flag for outlier status in a decision tree leaf node', 'final prediction of the second round', 'note' (for inspection purposes)
rad_test_r = pd.DataFrame(columns = ('actual', 'knn', 'knn_rad', 'dt', 'final', '1nn', 'outlier_dt', 'final2', 'note'))

In [7]:
#calculating the boundaries of a hypercube containing all data (for decision trees for deciding upon a flag for outlier)
abs_min_spd = min(df['spd'])
abs_max_spd = max(df['spd'])
abs_min_slope = min(df['slope'])
abs_max_slope = max(df['slope'])
abs_min_av_spd_1h = min(df['av_spd_1h'])
abs_max_av_spd_1h = max(df['av_spd_1h'])
abs_min_after = min(df['m_after'])
abs_max_after = max(df['m_after'])
abs_min_before = min(df['m_before'])
abs_max_before = max(df['m_before'])

In [8]:
#voting procedure for the first round (step)

#initializing classifiers
knn = KNeighborsClassifier(n_neighbors = n_neighbors) #n_neighbors is calculated either in CV or LOOCV procedure with the help of grid or random hyperparameter search
dt = DecisionTreeClassifier(min_samples_leaf = n_neighbors) #min_samples_leaf corresponds to the optimal value of k neighbors to restrict overfitting of decision trees
knnRr = RadiusNeighborsClassifier(radius = rr) #knnRm = RadiusNeighborsClassifier(radius = rm) for corrective measures

#running a voting procedure with LOOCV
for i in range(len(df)):
    #selecting training observations and singling out a test observation
    X = df.iloc[:, 2:]
    y = list(df.iloc[:, 0])
    X = X.reset_index(drop = True)
    obs_x = list(X.iloc[i, :])
    obs_y = y[i]
    y_new = y
    X_new = X.drop(X.index[i])
    X_new = X_new.reset_index(drop = True)
    del y_new[i]
    
    #fitting classifiers and making predictions
    knn.fit(X_new, y_new)
    knnRr.fit(X_new, y_new)
    dt.fit(X_new, y_new)
    neigh = knnRr.radius_neighbors((np.array(obs_x).reshape(1, -1)))
    neighKNN = knn.kneighbors((np.array(obs_x).reshape(1, -1)))
    oneNN = y_new[neighKNN[1][0][0]]
    knn_pred = knn.predict((np.array(obs_x)).reshape(1, -1))[0]
    dt_pred = dt.predict((np.array(obs_x)).reshape(1, -1))[0]
    dt_partitions = dt.apply(X_new)
    dt_partition_obs = dt.apply((np.array(obs_x)).reshape(1, -1))[0]
    ind = []
    list_part = list(dt_partitions)
    for j in range(len(list_part)):
        if list_part[j] == dt_partition_obs:
            ind.append(j)
    df_partition = X_new[X_new.index.isin(ind)]
    #calculating the boundaries of a hypercube containing a leaf node of a decision tree (for deciding upon a flag for outlier)
    min_spd = min(df_partition['spd'])
    max_spd = max(df_partition['spd'])
    min_slope = min(df_partition['slope'])
    max_slope = max(df_partition['slope'])
    min_av_spd_1h = min(df_partition['av_spd_1h'])
    max_av_spd_1h = max(df_partition['av_spd_1h'])
    min_after = min(df_partition['m_after'])
    max_after = max(df_partition['m_after'])
    min_before = min(df_partition['m_before'])
    max_before = max(df_partition['m_before'])
    conditions = []
    if min_spd < obs_x[1] and abs_min_spd < obs_x[1]:
        conditions.append(0)
    else:
        conditions.append(1)
        #print(min_spd, obs_x[1])
    if max_spd > obs_x[1] or abs_max_spd == obs_x[1]:
        conditions.append(0)
    else:
        conditions.append(1)
    if min_slope < obs_x[2] or abs_min_slope == obs_x[2]:
        conditions.append(0)
    else:
        conditions.append(1)
    if max_slope > obs_x[2] or abs_max_slope == obs_x[2]:
        conditions.append(0)
    else:
        conditions.append(1)
    if min_av_spd_1h < obs_x[3] or abs_min_av_spd_1h == obs_x[3]:
        conditions.append(0)
    else:
        conditions.append(1)
    if max_av_spd_1h > obs_x[3] or abs_max_av_spd_1h == obs_x[3]:
        conditions.append(0)
    else:
        conditions.append(1)
    if min_after < obs_x[4] or abs_min_after == obs_x[4]:
        conditions.append(0)
    else:
        conditions.append(1)
    if max_after > obs_x[4] or abs_max_after == obs_x[4]:
        conditions.append(0)
    else:
        conditions.append(1)
    if min_before < obs_x[5] or abs_min_before == obs_x[5]:
        conditions.append(0)
    else:
        conditions.append(1)
    if max_before > obs_x[5] or abs_max_before == obs_x[5]:
        conditions.append(0)
    else:
        conditions.append(1)
    
    if 1 in conditions:
        final_cond = 1
    else:
        final_cond = 0
    
    #writing prediction and voting results of the first round to the created dataframe
    if len(neigh[0][0]) != 0:
        knnRr_pred = knnRr.predict((np.array(obs_x)).reshape(1, -1))[0]
        if knn_pred == knnRr_pred:
            rad_test_r = rad_test_r.append({'actual': obs_y, 'knn': knn_pred, 'knn_rad': knnRr_pred, 'dt': dt_pred,
                                           'final': knn_pred, '1nn': oneNN, 'outlier_dt': final_cond},
                                           ignore_index = True)
        elif knn_pred == dt_pred and knn_pred != knnRr_pred:
            rad_test_r = rad_test_r.append({'actual': obs_y, 'knn': knn_pred, 'knn_rad': knnRr_pred, 'dt': dt_pred,
                                           'final': knn_pred, '1nn': oneNN, 'outlier_dt': final_cond},
                                           ignore_index = True)
        elif knnRr_pred == dt_pred and knn_pred != knnRr_pred:
            rad_test_r = rad_test_r.append({'actual': obs_y, 'knn': knn_pred, 'knn_rad': knnRr_pred, 'dt': dt_pred,
                                           'final': dt_pred, '1nn': oneNN, 'outlier_dt': final_cond},
                                           ignore_index = True)
        elif knnRr_pred != knn_pred and knn_pred != dt_pred:
            rad_test_r = rad_test_r.append({'actual': obs_y, 'knn': knn_pred, 'knn_rad': knnRr_pred, 'dt': dt_pred,
                                           'final': 'disagr', '1nn': oneNN, 'outlier_dt': final_cond},
                                           ignore_index = True)
        else:
            rad_test_r = rad_test_r.append({'actual': obs_y, 'knn': knn_pred, 'knn_rad': knnRr_pred, 'dt': dt_pred,
                                           'final': 'missing', '1nn': oneNN, 'outlier_dt': final_cond},
                                           ignore_index = True)
    else:
        rad_test_r = rad_test_r.append({'actual': obs_y, 'knn': knn_pred, 'knn_rad': 'null', 'dt': dt_pred,
                                       'final': 'null', '1nn': oneNN, 'outlier_dt': final_cond},
                                       ignore_index = True)

In [10]:
#running the second round of predictions (second voting step) and writing the results to the created dataframe
for i in range(len(rad_test_r)):
    if rad_test_r.iloc[i, 4] == 'disagr':
        rad_test_r.iloc[i, 7] = 'disagr'
    elif rad_test_r.iloc[i, 4] == 'null':
        if rad_test_r.iloc[i, 1] != rad_test_r.iloc[i, 3] and rad_test_r.iloc[i, 1] != rad_test_r.iloc[i, 5] and rad_test_r.iloc[i, 3] != rad_test_r.iloc[i, 5]:
            rad_test_r.iloc[i, 7] = 'disagr'
        elif rad_test_r.iloc[i, 1] == rad_test_r.iloc[i, 5] or rad_test_r.iloc[i, 5] == rad_test_r.iloc[i, 3]:
            rad_test_r.iloc[i, 7] = rad_test_r.iloc[i, 5]
        elif rad_test_r.iloc[i, 1] == rad_test_r.iloc[i, 3] and rad_test_r.iloc[i, 1] != rad_test_r.iloc[i, 5]:
            rad_test_r.iloc[i, 7] = 'null'
            rad_test_r.iloc[i, 8] = 'knn=dt!=1nn'
    else:
        rad_test_r.iloc[i, 7] = rad_test_r.iloc[i, 4]

In [2]:
#calculating accuracy in disagreements
counter = 0
for i in range(len(rad_test_r)):
    if rad_test_r.iloc[i, 7] == 'disagr' or rad_test_r.iloc[i, 7] == 'null':
        counter += 1
dis_dt = 0 #decision trees
dis_1nn = 0 #1-NN classifier
dis_knn = 0 #k-NN classifier
for i in range(len(rad_test_r)):
    if rad_test_r.iloc[i, 7] == 'disagr' or rad_test_r.iloc[i, 7] == 'null':
        if rad_test_r.iloc[i, 1] == rad_test_r.iloc[i, 0]:
            dis_knn += 1
        if rad_test_r.iloc[i, 3] == rad_test_r.iloc[i, 0]:
            dis_dt += 1
        if rad_test_r.iloc[i, 5] == rad_test_r.iloc[i, 0]:
            dis_1nn += 1
print('knn', dis_knn/counter)
print('dt', dis_dt/counter)
print('1nn', dis_1nn/counter)

In [3]:
#calculating classification accuracy and prediction coverage per algorithm, including prediction results after the first and second voting steps
#counters for accuracy of a stand-alone algorithm
acc_fin = 0
acc_fin2 = 0
acc_dt = 0
acc_knn = 0
acc_1nn = 0
#counters for accuracies of Hk-NN after one and two rounds of voting
fin_null_dis = 0
fin2_null_dis = 0
#extracting results form the created and completed dataframe
for i in range(len(rad_test_r)):
    if rad_test_r.iloc[i, 4] == 'null' or rad_test_r.iloc[i, 4] == 'disagr':
        fin_null_dis += 1
    if rad_test_r.iloc[i, 7] == 'null' or rad_test_r.iloc[i, 7] == 'disagr':
        fin2_null_dis += 1
    if rad_test_r.iloc[i, 7] == rad_test_r.iloc[i, 0]:
        acc_fin2 += 1
    if rad_test_r.iloc[i, 4] == rad_test_r.iloc[i, 0]:
        acc_fin += 1
    if rad_test_r.iloc[i, 1] == rad_test_r.iloc[i, 0]:
        acc_knn += 1
    if rad_test_r.iloc[i, 3] == rad_test_r.iloc[i, 0]:
        acc_dt += 1
    if rad_test_r.iloc[i, 5] == rad_test_r.iloc[i, 0]:
        acc_1nn += 1
print('knn', acc_knn/len(rad_test_r), 'covered', len(rad_test_r)/len(rad_test_r))
print('dt', acc_dt/len(rad_test_r), 'covered', len(rad_test_r)/len(rad_test_r))
print('1nn', acc_1nn/len(rad_test_r), 'covered', len(rad_test_r)/len(rad_test_r))
print('fin', acc_fin/(len(rad_test_r) - fin_null_dis), 'covered', (len(rad_test_r) - fin_null_dis)/len(rad_test_r))
print('fin2', acc_fin2/(len(rad_test_r) - fin2_null_dis), 'covered', (len(rad_test_r) - fin2_null_dis)/len(rad_test_r))

Micro- and macro-average precision and recall scores

In [6]:
#precision (final, i.e. Hk-NN)
tp_fin1 = len(rad_test_r.loc[(rad_test_r['actual'] == 1.0) & (rad_test_r['final2'] == 1.0)])
tp_fin2 = len(rad_test_r.loc[(rad_test_r['actual'] == 2.0) & (rad_test_r['final2'] == 2.0)])
tp_fin3 = len(rad_test_r.loc[(rad_test_r['actual'] == 3.0) & (rad_test_r['final2'] == 3.0)])
tp_fin4 = len(rad_test_r.loc[(rad_test_r['actual'] == 4.0) & (rad_test_r['final2'] == 4.0)])
tp_fin5 = len(rad_test_r.loc[(rad_test_r['actual'] == 5.0) & (rad_test_r['final2'] == 5.0)])
tp_fin6 = len(rad_test_r.loc[(rad_test_r['actual'] == 6.0) & (rad_test_r['final2'] == 6.0)]) #if there are six classes in the experimental setup, otherwise skip this line
all_fin1 = len(rad_test_r.loc[(rad_test_r['final2'] == 1.0)])
all_fin2 = len(rad_test_r.loc[(rad_test_r['final2'] == 2.0)])
all_fin3 = len(rad_test_r.loc[(rad_test_r['final2'] == 3.0)])
all_fin4 = len(rad_test_r.loc[(rad_test_r['final2'] == 4.0)])
all_fin5 = len(rad_test_r.loc[(rad_test_r['final2'] == 5.0)])
all_fin6 = len(rad_test_r.loc[(rad_test_r['final2'] == 6.0)]) #if there are six classes in the experimental setup, otherwise skip this line
print('micro precision', (tp_fin1 + tp_fin2 + tp_fin3 + tp_fin4 + tp_fin5)/(all_fin1 + all_fin2 + all_fin3 + all_fin4 + all_fin5)) #add variables for the sixth class if the number of classes is six
print('macro precision', (tp_fin1/all_fin1 + tp_fin2/all_fin2 + tp_fin3/all_fin3 + tp_fin4/all_fin4 + tp_fin5/all_fin5)/5) #add variables for the sixth class and devide by six if the number of classes is six

In [7]:
#precision (k-NN)
tp_fin1 = len(rad_test_r.loc[(rad_test_r['actual'] == 1.0) & (rad_test_r['knn'] == 1.0)])
tp_fin2 = len(rad_test_r.loc[(rad_test_r['actual'] == 2.0) & (rad_test_r['knn'] == 2.0)])
tp_fin3 = len(rad_test_r.loc[(rad_test_r['actual'] == 3.0) & (rad_test_r['knn'] == 3.0)])
tp_fin4 = len(rad_test_r.loc[(rad_test_r['actual'] == 4.0) & (rad_test_r['knn'] == 4.0)])
tp_fin5 = len(rad_test_r.loc[(rad_test_r['actual'] == 5.0) & (rad_test_r['knn'] == 5.0)])
tp_fin6 = len(rad_test_r.loc[(rad_test_r['actual'] == 6.0) & (rad_test_r['knn'] == 6.0)]) #if there are six classes in the experimental setup, otherwise skip this line
all_fin1 = len(rad_test_r.loc[(rad_test_r['knn'] == 1.0)])
all_fin2 = len(rad_test_r.loc[(rad_test_r['knn'] == 2.0)])
all_fin3 = len(rad_test_r.loc[(rad_test_r['knn'] == 3.0)])
all_fin4 = len(rad_test_r.loc[(rad_test_r['knn'] == 4.0)])
all_fin5 = len(rad_test_r.loc[(rad_test_r['knn'] == 5.0)])
all_fin6 = len(rad_test_r.loc[(rad_test_r['knn'] == 6.0)]) #if there are six classes in the experimental setup, otherwise skip this line
print('micro precision', (tp_fin1 + tp_fin2 + tp_fin3 + tp_fin4 + tp_fin5)/(all_fin1 + all_fin2 + all_fin3 + all_fin4 + all_fin5)) #add variables for the sixth class if the number of classes is six
print('macro precision', (tp_fin1/all_fin1 + tp_fin2/all_fin2 + tp_fin3/all_fin3 + tp_fin4/all_fin4 + tp_fin5/all_fin5)/5) #add variables for the sixth class and devide by six if the number of classes is six

In [8]:
#recall (final, i.e. Hk-NN)
tp_fin1 = len(rad_test_r.loc[(rad_test_r['actual'] == 1.0) & (rad_test_r['final2'] == 1.0)])
tp_fin2 = len(rad_test_r.loc[(rad_test_r['actual'] == 2.0) & (rad_test_r['final2'] == 2.0)])
tp_fin3 = len(rad_test_r.loc[(rad_test_r['actual'] == 3.0) & (rad_test_r['final2'] == 3.0)])
tp_fin4 = len(rad_test_r.loc[(rad_test_r['actual'] == 4.0) & (rad_test_r['final2'] == 4.0)])
tp_fin5 = len(rad_test_r.loc[(rad_test_r['actual'] == 5.0) & (rad_test_r['final2'] == 5.0)])
tp_fin6 = len(rad_test_r.loc[(rad_test_r['actual'] == 6.0) & (rad_test_r['final2'] == 6.0)]) #if there are six classes in the experimental setup, otherwise skip this line
act_fin1 = len(rad_test_r.loc[(rad_test_r['actual'] == 1.0) & ((rad_test_r['final2'] != 'null') & (rad_test_r['final2'] != 'disagr'))])
act_fin2 = len(rad_test_r.loc[(rad_test_r['actual'] == 2.0) & ((rad_test_r['final2'] != 'null') & (rad_test_r['final2'] != 'disagr'))])
act_fin3 = len(rad_test_r.loc[(rad_test_r['actual'] == 3.0) & ((rad_test_r['final2'] != 'null') & (rad_test_r['final2'] != 'disagr'))])
act_fin4 = len(rad_test_r.loc[(rad_test_r['actual'] == 4.0) & ((rad_test_r['final2'] != 'null') & (rad_test_r['final2'] != 'disagr'))])
act_fin5 = len(rad_test_r.loc[(rad_test_r['actual'] == 5.0) & ((rad_test_r['final2'] != 'null') & (rad_test_r['final2'] != 'disagr'))])
act_fin6 = len(rad_test_r.loc[(rad_test_r['actual'] == 6.0) & ((rad_test_r['final2'] != 'null') & (rad_test_r['final2'] != 'disagr'))]) #if there are six classes in the experimental setup, otherwise skip this line
print('micro precision', (tp_fin1 + tp_fin2 + tp_fin3 + tp_fin4 + tp_fin5)/(act_fin1 + act_fin2 + act_fin3 + act_fin4 + act_fin5)) #add variables for the sixth class if the number of classes is six
print('macro precision', (tp_fin1/act_fin1 + tp_fin2/act_fin2 + tp_fin3/act_fin3 + tp_fin4/act_fin4 + tp_fin5/act_fin5)/5) #add variables for the sixth class and devide by six if the number of classes is six

In [9]:
#recall (k-NN)
tp_fin1 = len(rad_test_r.loc[(rad_test_r['actual'] == 1.0) & (rad_test_r['knn'] == 1.0)])
tp_fin2 = len(rad_test_r.loc[(rad_test_r['actual'] == 2.0) & (rad_test_r['knn'] == 2.0)])
tp_fin3 = len(rad_test_r.loc[(rad_test_r['actual'] == 3.0) & (rad_test_r['knn'] == 3.0)])
tp_fin4 = len(rad_test_r.loc[(rad_test_r['actual'] == 4.0) & (rad_test_r['knn'] == 4.0)])
tp_fin5 = len(rad_test_r.loc[(rad_test_r['actual'] == 5.0) & (rad_test_r['knn'] == 5.0)])
tp_fin6 = len(rad_test_r.loc[(rad_test_r['actual'] == 6.0) & (rad_test_r['knn'] == 6.0)]) #if there are six classes in the experimental setup, otherwise skip this line
act_fin1 = len(rad_test_r.loc[(rad_test_r['actual'] == 1.0) & ((rad_test_r['knn'] != 'null') & (rad_test_r['knn'] != 'disagr'))])
act_fin2 = len(rad_test_r.loc[(rad_test_r['actual'] == 2.0) & ((rad_test_r['knn'] != 'null') & (rad_test_r['knn'] != 'disagr'))])
act_fin3 = len(rad_test_r.loc[(rad_test_r['actual'] == 3.0) & ((rad_test_r['knn'] != 'null') & (rad_test_r['knn'] != 'disagr'))])
act_fin4 = len(rad_test_r.loc[(rad_test_r['actual'] == 4.0) & ((rad_test_r['knn'] != 'null') & (rad_test_r['knn'] != 'disagr'))])
act_fin5 = len(rad_test_r.loc[(rad_test_r['actual'] == 5.0) & ((rad_test_r['knn'] != 'null') & (rad_test_r['knn'] != 'disagr'))])
act_fin6 = len(rad_test_r.loc[(rad_test_r['actual'] == 6.0) & ((rad_test_r['knn'] != 'null') & (rad_test_r['knn'] != 'disagr'))]) #if there are six classes in the experimental setup, otherwise skip this line
print('micro precision', (tp_fin1 + tp_fin2 + tp_fin3 + tp_fin4 + tp_fin5)/(act_fin1 + act_fin2 + act_fin3 + act_fin4 + act_fin5)) #add variables for the sixth class if the number of classes is six
print('macro precision', (tp_fin1/act_fin1 + tp_fin2/act_fin2 + tp_fin3/act_fin3 + tp_fin4/act_fin4 + tp_fin5/act_fin5)/5) #add variables for the sixth class and devide by six if the number of classes is six

In [10]:
#specificity (final, i.e. Hk-NN)
l = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
dff = rad_test_r[rad_test_r.final2.isin(l)]
tn_fin1 = len(dff.loc[(dff['actual'] != 1.0) & (dff['final2'] != 1.0)])
tn_fin2 = len(dff.loc[(dff['actual'] != 2.0) & (dff['final2'] != 2.0)])
tn_fin3 = len(dff.loc[(dff['actual'] != 3.0) & (dff['final2'] != 3.0)])
tn_fin4 = len(dff.loc[(dff['actual'] != 4.0) & (dff['final2'] != 4.0)])
tn_fin5 = len(dff.loc[(dff['actual'] != 5.0) & (dff['final2'] != 5.0)])
tn_fin6 = len(dff.loc[(dff['actual'] != 6.0) & (dff['final2'] != 6.0)]) #if there are six classes in the experimental setup, otherwise skip this line
fp_fin1 = len(dff.loc[(dff['final2'] == 1.0) & (dff['actual'] != 1.0)])
fp_fin2 = len(dff.loc[(dff['final2'] == 2.0) & (dff['actual'] != 2.0)])
fp_fin3 = len(dff.loc[(dff['final2'] == 3.0) & (dff['actual'] != 3.0)])
fp_fin4 = len(dff.loc[(dff['final2'] == 4.0) & (dff['actual'] != 4.0)])
fp_fin5 = len(dff.loc[(dff['final2'] == 5.0) & (dff['actual'] != 5.0)])
fp_fin6 = len(dff.loc[(dff['final2'] == 6.0) & (dff['actual'] != 6.0)]) #if there are six classes in the experimental setup, otherwise skip this line
print('specificity', ((tn_fin1/(tn_fin1 + fp_fin1) + tn_fin2/(tn_fin2 + fp_fin2) + tn_fin3/(tn_fin3 + fp_fin3) + tn_fin4/(tn_fin4 + fp_fin4) + tn_fin5/(tn_fin5 + fp_fin5))/5)) #add variables for the sixth class and devide by six if the number of classes is six

In [11]:
#specificity (k-NN)
l = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
dff = rad_test_r[rad_test_r.knn.isin(l)]
tn_fin1 = len(dff.loc[(dff['actual'] != 1.0) & (dff['knn'] != 1.0)])
tn_fin2 = len(dff.loc[(dff['actual'] != 2.0) & (dff['knn'] != 2.0)])
tn_fin3 = len(dff.loc[(dff['actual'] != 3.0) & (dff['knn'] != 3.0)])
tn_fin4 = len(dff.loc[(dff['actual'] != 4.0) & (dff['knn'] != 4.0)])
tn_fin5 = len(dff.loc[(dff['actual'] != 5.0) & (dff['knn'] != 5.0)])
tn_fin6 = len(dff.loc[(dff['actual'] != 6.0) & (dff['knn'] != 6.0)]) #if there are six classes in the experimental setup, otherwise skip this line
fp_fin1 = len(dff.loc[(dff['knn'] == 1.0) & (dff['actual'] != 1.0)])
fp_fin2 = len(dff.loc[(dff['knn'] == 2.0) & (dff['actual'] != 2.0)])
fp_fin3 = len(dff.loc[(dff['knn'] == 3.0) & (dff['actual'] != 3.0)])
fp_fin4 = len(dff.loc[(dff['knn'] == 4.0) & (dff['actual'] != 4.0)])
fp_fin5 = len(dff.loc[(dff['knn'] == 5.0) & (dff['actual'] != 5.0)])
fp_fin6 = len(dff.loc[(dff['knn'] == 6.0) & (dff['actual'] != 6.0)]) #if there are six classes in the experimental setup, otherwise skip this line
print('specificity', ((tn_fin1/(tn_fin1 + fp_fin1) + tn_fin2/(tn_fin2 + fp_fin2) + tn_fin3/(tn_fin3 + fp_fin3) + tn_fin4/(tn_fin4 + fp_fin4) + tn_fin5/(tn_fin5 + fp_fin5))/5)) #add variables for the sixth class and devide by six if the number of classes is six

In [5]:
#entries for the confusion matrix (final, i.e. Hk-NN)
#the same holds true for the sixth class
l = ['null', 'disagr']
dffinal = rad_test_r[~rad_test_r.final2.isin(l)]
print('1 predicted as 1', len(dffinal.loc[(dffinal['actual'] == 1.0) & (dffinal['final2'] == 1.0)]))
print('1 predicted as 2', len(dffinal.loc[(dffinal['actual'] == 1.0) & (dffinal['final2'] == 2.0)]))
print('1 predicted as 3', len(dffinal.loc[(dffinal['actual'] == 1.0) & (dffinal['final2'] == 3.0)]))
print('1 predicted as 4', len(dffinal.loc[(dffinal['actual'] == 1.0) & (dffinal['final2'] == 4.0)]))
print('1 predicted as 5', len(dffinal.loc[(dffinal['actual'] == 1.0) & (dffinal['final2'] == 5.0)]))
print('1 predicted as 6', len(dffinal.loc[(dffinal['actual'] == 1.0) & (dffinal['final2'] == 6.0)]))
print('2 predicted as 1', len(dffinal.loc[(dffinal['actual'] == 2.0) & (dffinal['final2'] == 1.0)]))
print('2 predicted as 2', len(dffinal.loc[(dffinal['actual'] == 2.0) & (dffinal['final2'] == 2.0)]))
print('2 predicted as 3', len(dffinal.loc[(dffinal['actual'] == 2.0) & (dffinal['final2'] == 3.0)]))
print('2 predicted as 4', len(dffinal.loc[(dffinal['actual'] == 2.0) & (dffinal['final2'] == 4.0)]))
print('2 predicted as 5', len(dffinal.loc[(dffinal['actual'] == 2.0) & (dffinal['final2'] == 5.0)]))
print('2 predicted as 6', len(dffinal.loc[(dffinal['actual'] == 2.0) & (dffinal['final2'] == 6.0)]))
print('3 predicted as 1', len(dffinal.loc[(dffinal['actual'] == 3.0) & (dffinal['final2'] == 1.0)]))
print('3 predicted as 2', len(dffinal.loc[(dffinal['actual'] == 3.0) & (dffinal['final2'] == 2.0)]))
print('3 predicted as 3', len(dffinal.loc[(dffinal['actual'] == 3.0) & (dffinal['final2'] == 3.0)]))
print('3 predicted as 4', len(dffinal.loc[(dffinal['actual'] == 3.0) & (dffinal['final2'] == 4.0)]))
print('3 predicted as 5', len(dffinal.loc[(dffinal['actual'] == 3.0) & (dffinal['final2'] == 5.0)]))
print('3 predicted as 6', len(dffinal.loc[(dffinal['actual'] == 3.0) & (dffinal['final2'] == 6.0)]))
print('4 predicted as 1', len(dffinal.loc[(dffinal['actual'] == 4.0) & (dffinal['final2'] == 1.0)]))
print('4 predicted as 2', len(dffinal.loc[(dffinal['actual'] == 4.0) & (dffinal['final2'] == 2.0)]))
print('4 predicted as 3', len(dffinal.loc[(dffinal['actual'] == 4.0) & (dffinal['final2'] == 3.0)]))
print('4 predicted as 4', len(dffinal.loc[(dffinal['actual'] == 4.0) & (dffinal['final2'] == 4.0)]))
print('4 predicted as 5', len(dffinal.loc[(dffinal['actual'] == 4.0) & (dffinal['final2'] == 5.0)]))
print('4 predicted as 6', len(dffinal.loc[(dffinal['actual'] == 4.0) & (dffinal['final2'] == 6.0)]))
print('5 predicted as 1', len(dffinal.loc[(dffinal['actual'] == 5.0) & (dffinal['final2'] == 1.0)]))
print('5 predicted as 2', len(dffinal.loc[(dffinal['actual'] == 5.0) & (dffinal['final2'] == 2.0)]))
print('5 predicted as 3', len(dffinal.loc[(dffinal['actual'] == 5.0) & (dffinal['final2'] == 3.0)]))
print('5 predicted as 4', len(dffinal.loc[(dffinal['actual'] == 5.0) & (dffinal['final2'] == 4.0)]))
print('5 predicted as 5', len(dffinal.loc[(dffinal['actual'] == 5.0) & (dffinal['final2'] == 5.0)]))
print('5 predicted as 6', len(dffinal.loc[(dffinal['actual'] == 5.0) & (dffinal['final2'] == 6.0)]))

In [4]:
#entries for the confusion matrix (k-NN)
#the same holds true for the sixth class
print('1 predicted as 1', len(rad_test_r.loc[(rad_test_r['actual'] == 1.0) & (rad_test_r['knn'] == 1.0)]))
print('1 predicted as 2', len(rad_test_r.loc[(rad_test_r['actual'] == 1.0) & (rad_test_r['knn'] == 2.0)]))
print('1 predicted as 3', len(rad_test_r.loc[(rad_test_r['actual'] == 1.0) & (rad_test_r['knn'] == 3.0)]))
print('1 predicted as 4', len(rad_test_r.loc[(rad_test_r['actual'] == 1.0) & (rad_test_r['knn'] == 4.0)]))
print('1 predicted as 5', len(rad_test_r.loc[(rad_test_r['actual'] == 1.0) & (rad_test_r['knn'] == 5.0)]))
print('1 predicted as 6', len(rad_test_r.loc[(rad_test_r['actual'] == 1.0) & (rad_test_r['knn'] == 6.0)]))
print('2 predicted as 1', len(rad_test_r.loc[(rad_test_r['actual'] == 2.0) & (rad_test_r['knn'] == 1.0)]))
print('2 predicted as 2', len(rad_test_r.loc[(rad_test_r['actual'] == 2.0) & (rad_test_r['knn'] == 2.0)]))
print('2 predicted as 3', len(rad_test_r.loc[(rad_test_r['actual'] == 2.0) & (rad_test_r['knn'] == 3.0)]))
print('2 predicted as 4', len(rad_test_r.loc[(rad_test_r['actual'] == 2.0) & (rad_test_r['knn'] == 4.0)]))
print('2 predicted as 5', len(rad_test_r.loc[(rad_test_r['actual'] == 2.0) & (rad_test_r['knn'] == 5.0)]))
print('2 predicted as 6', len(rad_test_r.loc[(rad_test_r['actual'] == 2.0) & (rad_test_r['knn'] == 6.0)]))
print('3 predicted as 1', len(rad_test_r.loc[(rad_test_r['actual'] == 3.0) & (rad_test_r['knn'] == 1.0)]))
print('3 predicted as 2', len(rad_test_r.loc[(rad_test_r['actual'] == 3.0) & (rad_test_r['knn'] == 2.0)]))
print('3 predicted as 3', len(rad_test_r.loc[(rad_test_r['actual'] == 3.0) & (rad_test_r['knn'] == 3.0)]))
print('3 predicted as 4', len(rad_test_r.loc[(rad_test_r['actual'] == 3.0) & (rad_test_r['knn'] == 4.0)]))
print('3 predicted as 5', len(rad_test_r.loc[(rad_test_r['actual'] == 3.0) & (rad_test_r['knn'] == 5.0)]))
print('3 predicted as 6', len(rad_test_r.loc[(rad_test_r['actual'] == 3.0) & (rad_test_r['knn'] == 6.0)]))
print('4 predicted as 1', len(rad_test_r.loc[(rad_test_r['actual'] == 4.0) & (rad_test_r['knn'] == 1.0)]))
print('4 predicted as 2', len(rad_test_r.loc[(rad_test_r['actual'] == 4.0) & (rad_test_r['knn'] == 2.0)]))
print('4 predicted as 3', len(rad_test_r.loc[(rad_test_r['actual'] == 4.0) & (rad_test_r['knn'] == 3.0)]))
print('4 predicted as 4', len(rad_test_r.loc[(rad_test_r['actual'] == 4.0) & (rad_test_r['knn'] == 4.0)]))
print('4 predicted as 5', len(rad_test_r.loc[(rad_test_r['actual'] == 4.0) & (rad_test_r['knn'] == 5.0)]))
print('4 predicted as 6', len(rad_test_r.loc[(rad_test_r['actual'] == 4.0) & (rad_test_r['knn'] == 6.0)]))
print('5 predicted as 1', len(rad_test_r.loc[(rad_test_r['actual'] == 5.0) & (rad_test_r['knn'] == 1.0)]))
print('5 predicted as 2', len(rad_test_r.loc[(rad_test_r['actual'] == 5.0) & (rad_test_r['knn'] == 2.0)]))
print('5 predicted as 3', len(rad_test_r.loc[(rad_test_r['actual'] == 5.0) & (rad_test_r['knn'] == 3.0)]))
print('5 predicted as 4', len(rad_test_r.loc[(rad_test_r['actual'] == 5.0) & (rad_test_r['knn'] == 4.0)]))
print('5 predicted as 5', len(rad_test_r.loc[(rad_test_r['actual'] == 5.0) & (rad_test_r['knn'] == 5.0)]))