In [1]:
import numpy as np
import scipy.stats as st
from sklearn.metrics import precision_score, recall_score, f1_score
import sklearn

In [2]:
### Constantes ###
DATASET_NAME = 'mutagenicity'
CENTRALITY_NAME = 'pagerank'
AVERAGE = 'micro' if DATASET_NAME == 'letter' else 'binary'

In [3]:
def get_data(dataset_name, percentage, centrality_name):
    filename = f'../results/h_knn/{dataset_name}/prediction_{percentage}_{centrality_name}.npy'
    with open(filename, 'rb') as f:
        lbls_test = np.load(f)
        predictions = np.load(f)
    
    return lbls_test, predictions


In [4]:
def calc_accuracy(lbls_test, predictions):
    correctly_classified = np.sum(lbls_test == predictions)
    accuracy = 100 * (correctly_classified / len(lbls_test))
    
    return round(accuracy, 2)

In [5]:
def get_X(lbls_test, predictions_100, predictions_to_test):
    X_t = []
    for ground_truth, pred_100, pred_to_test in zip(lbls_test, predictions_100, predictions_to_test):
        is_100_correct = pred_100 == ground_truth
        is_pred_to_test_correct = pred_to_test == ground_truth
        val = 1 * (is_100_correct and not is_pred_to_test_correct) + \
            (-1) * (not is_100_correct and is_pred_to_test_correct)
        X_t.append(val)
            
    return np.array(X_t)

def calc_Z_score(lbls_test, predictions_100, predictions_to_test):
    X_t = get_X(lbls_test, predictions_100, predictions_to_test)
    
    mu_x = np.mean(X_t)
    var_x = np.var(X_t)
    
    Z = mu_x / (np.sqrt(var_x / len(lbls_test)))
    
    return Z

In [6]:
lbls_test, predictions_100 = get_data(DATASET_NAME, '100', CENTRALITY_NAME)
alpha = 0.1

percentages = [100, 80, 60, 40, 20]
for percentage in percentages:
    _, predictions = get_data(DATASET_NAME, str(percentage), CENTRALITY_NAME)
    
    accuracy = calc_accuracy(lbls_test, predictions)
    z_score = calc_Z_score(lbls_test, predictions_100, predictions)
    p_value = st.norm.cdf(z_score)
    
    print(f'Percentage: {percentage}\n')
    
    print(f'Accuracy {accuracy}%')
    print(f'Precision {precision_score(lbls_test, predictions, average=AVERAGE):.2f}')
    print(f'Recall {recall_score(lbls_test, predictions, average=AVERAGE):.2f}')
    print(f'F1-score {f1_score(lbls_test, predictions, average=AVERAGE):.2f}\n')
    
    print(f'Z score {z_score:.2f}')
    print(f'P-value {p_value:.2f}')
    
    if z_score < 0:
        print(f'Difference significantly better (alpha={alpha}): {p_value < alpha}')
    else:
        print(f'Difference significantly worst (alpha={alpha}): {1-p_value < alpha}')
    print('-------\n')

Percentage: 100

Accuracy 71.63%
Precision 0.71
Recall 0.61
F1-score 0.66

Z score nan
P-value nan
Difference significantly worst (alpha=0.1): False
-------

Percentage: 80

Accuracy 72.31%
Precision 0.72
Recall 0.62
F1-score 0.67

Z score -0.72
P-value 0.24
Difference significantly better (alpha=0.1): False
-------

Percentage: 60

Accuracy 69.62%
Precision 0.67
Recall 0.62
F1-score 0.65

Z score 2.02
P-value 0.98
Difference significantly worst (alpha=0.1): True
-------

Percentage: 40

Accuracy 63.84%
Precision 0.62
Recall 0.48
F1-score 0.54

Z score 6.66
P-value 1.00
Difference significantly worst (alpha=0.1): True
-------

Percentage: 20

Accuracy 62.99%
Precision 0.59
Recall 0.54
F1-score 0.57

Z score 7.25
P-value 1.00
Difference significantly worst (alpha=0.1): True
-------



  Z = mu_x / (np.sqrt(var_x / len(lbls_test)))
