In [1]:
import numpy as np
import scipy.stats as st
from sklearn.metrics import precision_score, recall_score, f1_score
import sklearn

In [2]:
### Constantes ###
# DATASET_NAME = 'letter'
# DATASET_NAME = 'AIDS'
# DATASET_NAME = 'mutagenicity'
DATASET_NAME = 'NCI1'
CENTRALITY_NAME = 'pagerank'
# CENTRALITY_NAME = 'betweenness'
EXPERIMENT = 'h_knn'
AVERAGE = 'micro' if DATASET_NAME == 'letter' else 'binary'

In [3]:
def get_data(percentage):
    filename = f'../results/{EXPERIMENT}/{DATASET_NAME}/prediction_{percentage}_{CENTRALITY_NAME}.npy'
    with open(filename, 'rb') as f:
        lbls_test = np.load(f)
        predictions = np.load(f)
    
    return lbls_test, predictions

def save_stats(message):
    filename = f'statistics_{EXPERIMENT}_{DATASET_NAME}_{CENTRALITY_NAME}.txt'
    with open(filename, 'w') as f:
        f.write(message)

In [4]:
def calc_accuracy(lbls_test, predictions):
    correctly_classified = np.sum(lbls_test == predictions)
    accuracy = 100 * (correctly_classified / len(lbls_test))
    
    return round(accuracy, 2)

In [5]:
def get_X(lbls_test, predictions_100, predictions_to_test):
    X_t = []
    for ground_truth, pred_100, pred_to_test in zip(lbls_test, predictions_100, predictions_to_test):
        is_100_correct = pred_100 == ground_truth
        is_pred_to_test_correct = pred_to_test == ground_truth
        val = 1 * (is_100_correct and not is_pred_to_test_correct) + \
            (-1) * (not is_100_correct and is_pred_to_test_correct)
        X_t.append(val)
            
    return np.array(X_t)

def calc_Z_score(lbls_test, predictions_100, predictions_to_test):
    X_t = get_X(lbls_test, predictions_100, predictions_to_test)
    
    mu_x = np.mean(X_t)
    var_x = np.var(X_t)
    
    Z = mu_x / (np.sqrt(var_x / len(lbls_test)))
    
    return Z

In [6]:
lbls_test, predictions_100 = get_data('100')
alpha = 0.1

percentages = [100, 80, 60, 40, 20]
message = ''
for percentage in percentages:
    _, predictions = get_data(str(percentage))
    
    accuracy = calc_accuracy(lbls_test, predictions)
    z_score = calc_Z_score(lbls_test, predictions_100, predictions)
    p_value = st.norm.cdf(z_score)
    
    message += f'Percentage: {percentage}\n'
    
    message += f'Accuracy {accuracy}%\n'
    message += f'Precision {precision_score(lbls_test, predictions, average=AVERAGE):.2f}\n'
    message += f'Recall {recall_score(lbls_test, predictions, average=AVERAGE):.2f}\n'
    message += f'F1-score {f1_score(lbls_test, predictions, average=AVERAGE):.2f}\n\n'
    
    message += f'Z score {z_score:.2f}\n'
    message += f'P-value {p_value:.2f}\n'
    
    if z_score < 0:
        message += f'Difference significantly better (alpha={alpha}): {p_value <= alpha}\n'
    else:
        message += f'Difference significantly worst (alpha={alpha}): {1-p_value <= alpha}\n'
    message += '-------\n'

save_stats(message)
print(message)

Percentage: 100
Accuracy 70.52%
Precision 0.70
Recall 0.71
F1-score 0.71

Z score nan
P-value nan
Difference significantly worst (alpha=0.1): False
-------
Percentage: 80
Accuracy 64.08%
Precision 0.64
Recall 0.64
F1-score 0.64

Z score 5.25
P-value 1.00
Difference significantly worst (alpha=0.1): True
-------
Percentage: 60
Accuracy 60.57%
Precision 0.62
Recall 0.54
F1-score 0.58

Z score 7.46
P-value 1.00
Difference significantly worst (alpha=0.1): True
-------
Percentage: 40
Accuracy 62.75%
Precision 0.62
Recall 0.66
F1-score 0.64

Z score 6.07
P-value 1.00
Difference significantly worst (alpha=0.1): True
-------
Percentage: 20
Accuracy 58.06%
Precision 0.59
Recall 0.53
F1-score 0.56

Z score 9.43
P-value 1.00
Difference significantly worst (alpha=0.1): True
-------



  Z = mu_x / (np.sqrt(var_x / len(lbls_test)))
