In [14]:
import numpy as np
import scipy.stats as st
from sklearn.metrics import precision_score, recall_score, f1_score
import sklearn

In [66]:
### Constantes ###
DATASET_NAME = 'letter'
# DATASET_NAME = 'AIDS'
# DATASET_NAME = 'mutagenicity'
# DATASET_NAME = 'NCI1'
CENTRALITY_NAME = 'pagerank'
EXPERIMENT = 'coarse_to_fine'
AVERAGE = 'micro' if DATASET_NAME == 'letter' else 'binary'

In [67]:
def get_ground_truth():
    filename = f'../results/h_knn/{DATASET_NAME}/prediction_100_{CENTRALITY_NAME}.npy'
    return _get_data(filename)

def get_data():
    filename = f'../results/{EXPERIMENT}/{DATASET_NAME}/res_{CENTRALITY_NAME}_pt4.npy'
    return _get_data(filename)

def _get_data(filename):

    with open(filename, 'rb') as f:
        lbls_test = np.load(f)
        predictions = np.load(f)
    
    return lbls_test, predictions

def save_stats(message):
    filename = f'statistics_{EXPERIMENT}_{DATASET_NAME}_{CENTRALITY_NAME}.txt'
    with open(filename, 'w') as f:
        f.write(message)

In [68]:
def calc_accuracy(lbls_test, predictions):
    correctly_classified = np.sum(lbls_test == predictions)
    accuracy = 100 * (correctly_classified / len(lbls_test))
    
    return round(accuracy, 2)

In [69]:
def get_X(lbls_test, predictions_100, predictions_to_test):
    X_t = []
    for ground_truth, pred_100, pred_to_test in zip(lbls_test, predictions_100, predictions_to_test):
        is_100_correct = pred_100 == ground_truth
        is_pred_to_test_correct = pred_to_test == ground_truth
        val = 1 * (is_100_correct and not is_pred_to_test_correct) + \
            (-1) * (not is_100_correct and is_pred_to_test_correct)
        X_t.append(val)
            
    return np.array(X_t)

def calc_Z_score(lbls_test, predictions_100, predictions_to_test):
    X_t = get_X(lbls_test, predictions_100, predictions_to_test)
    
    mu_x = np.mean(X_t)
    var_x = np.var(X_t)
    
    Z = mu_x / (np.sqrt(var_x / len(lbls_test)))
    
    return Z

In [70]:
lbls_test, predictions_100 = get_ground_truth()
alpha = 0.1


message = ''

_, predictions = get_data()

accuracy_ground_truth = calc_accuracy(lbls_test, predictions_100)
accuracy = calc_accuracy(lbls_test, predictions)
z_score = calc_Z_score(lbls_test, predictions_100, predictions)
p_value = st.norm.cdf(z_score)
    

message += f'Accuracy ground truth: {accuracy_ground_truth} '
message += f'Accuracy {accuracy}%\n'
message += f'Precision {precision_score(lbls_test, predictions, average=AVERAGE):.2f}\n'
message += f'Recall {recall_score(lbls_test, predictions, average=AVERAGE):.2f}\n'
message += f'F1-score {f1_score(lbls_test, predictions, average=AVERAGE):.2f}\n\n'
    
message += f'Z score {z_score:.2f}\n'
message += f'P-value {p_value:.2f}\n'
    
if z_score < 0:
    message += f'Difference significantly better (alpha={alpha}): {p_value <= alpha}\n'
else:
    message += f'Difference significantly worst (alpha={alpha}): {1-p_value <= alpha}\n'
message += '-------\n'

save_stats(message)
print(message)

Accuracy ground truth: 89.33 Accuracy 82.27%
Precision 0.82
Recall 0.82
F1-score 0.82

Z score 6.03
P-value 1.00
Difference significantly worst (alpha=0.1): True
-------

