In [1]:
import numpy as np
import scipy.stats as st
from sklearn.metrics import precision_score, recall_score, f1_score
import sklearn

In [2]:
### Constantes ###
# DATASET_NAME = 'letter'
DATASET_NAME = 'AIDS'
# DATASET_NAME = 'mutagenicity'
# DATASET_NAME = 'NCI1'
CENTRALITY_NAME = 'pagerank'
EXPERIMENT = 'coarse_to_fine'
POINT = '3'
AVERAGE = 'micro' if DATASET_NAME == 'letter' else 'binary'

In [3]:
def get_ground_truth():
    filename = f'../results/h_knn/{DATASET_NAME}/prediction_100_{CENTRALITY_NAME}.npy'
    return _get_data(filename)

def get_data():
    filename = f'../results/{EXPERIMENT}/{DATASET_NAME}/res_{CENTRALITY_NAME}_pt{POINT}.npy'
    return _get_data(filename)

def _get_data(filename):

    with open(filename, 'rb') as f:
        lbls_test = np.load(f)
        predictions = np.load(f)
    
    return lbls_test, predictions

def save_stats(message):
    filename = f'statistics_{EXPERIMENT}_{DATASET_NAME}_{CENTRALITY_NAME}.txt'
    with open(filename, 'w') as f:
        f.write(message)

In [4]:
def calc_accuracy(lbls_test, predictions):
    correctly_classified = np.sum(lbls_test == predictions)
    
    
    
    print(correctly_classified)
    print(len(lbls_test))
    print(correctly_classified / len(lbls_test))
    
    accuracy = 100 * (correctly_classified / len(lbls_test))
    
    return round(accuracy, 2)

In [5]:
def get_X(lbls_test, predictions_100, predictions_to_test):
    X_t = []
    for ground_truth, pred_100, pred_to_test in zip(lbls_test, predictions_100, predictions_to_test):
        is_100_correct = pred_100 == ground_truth
        is_pred_to_test_correct = pred_to_test == ground_truth
        val = 1 * (is_100_correct and not is_pred_to_test_correct) + \
            (-1) * (not is_100_correct and is_pred_to_test_correct)
        X_t.append(val)
            
    return np.array(X_t)

def calc_Z_score(lbls_test, predictions_100, predictions_to_test):
    X_t = get_X(lbls_test, predictions_100, predictions_to_test)
    
    mu_x = np.mean(X_t)
    var_x = np.var(X_t)
    
    Z = mu_x / (np.sqrt(var_x / len(lbls_test)))
    
    return Z

In [6]:
lbls_test, predictions_100 = get_ground_truth()
alpha = 0.1


message = ''

_, predictions = get_data()

accuracy_ground_truth = calc_accuracy(lbls_test, predictions_100)
accuracy = calc_accuracy(lbls_test, predictions)
z_score = calc_Z_score(lbls_test, predictions_100, predictions)
p_value = st.norm.cdf(z_score)
    

message += f'Accuracy ground truth: {accuracy_ground_truth} '
message += f'Accuracy {accuracy}%\n'
message += f'Precision {precision_score(lbls_test, predictions, average=AVERAGE):.2f}\n'
message += f'Recall {recall_score(lbls_test, predictions, average=AVERAGE):.2f}\n'
message += f'F1-score {f1_score(lbls_test, predictions, average=AVERAGE):.2f}\n\n'
    
message += f'Z score {z_score:.2f}\n'
message += f'P-value {p_value:.2f}\n'
    
if z_score < 0:
    message += f'Difference significantly better (alpha={alpha}): {p_value <= alpha}\n'
else:
    message += f'Difference significantly worst (alpha={alpha}): {1-p_value <= alpha}\n'
message += '-------\n'

save_stats(message)
print(message)

1483
1500
0.9886666666666667
1480
1500
0.9866666666666667
Accuracy ground truth: 98.87 Accuracy 98.67%
Precision 0.99
Recall 1.00
F1-score 0.99

Z score 0.60
P-value 0.73
Difference significantly worst (alpha=0.1): False
-------



In [7]:
1-p_value

0.27422912177764136

In [8]:
filename = f'../results/{EXPERIMENT}/{DATASET_NAME}/res_{CENTRALITY_NAME}_pt{POINT}_idx_predicted.npy'
ground_truth, idx_predicted = _get_data(filename)

In [9]:
len(idx_predicted)

1473

In [10]:
predictions[idx_predicted]

array([1, 1, 1, ..., 1, 1, 1], dtype=int32)

In [11]:
accuracy = calc_accuracy(ground_truth[idx_predicted], predictions[idx_predicted])
accuracy

1457
1473
0.9891378139850645


98.91

In [12]:
mask = np.ones(len(predictions), bool)
mask[idx_predicted] = 0
len(predictions[mask])

27

In [13]:
accuracy = calc_accuracy(ground_truth[mask], predictions[mask])
accuracy

23
27
0.8518518518518519


85.19

idx_predicted

In [14]:
print(predictions[mask])

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [15]:
print(ground_truth[mask])

[0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1]


In [16]:
from collections import Counter

Counter(predictions[mask])

Counter({0: 27})

In [17]:
Counter(ground_truth[mask])

Counter({0: 23, 1: 4})

In [18]:
mask

array([False,  True,  True, ..., False, False, False])

In [19]:
np.where(mask == 1.)

(array([  1,   2,   3,   6,   8,  19, 101, 273, 503, 504, 505, 506, 507,
        509, 510, 511, 512, 516, 517, 518, 530, 538, 539, 558, 562, 702,
        703]),)

In [20]:
calc_accuracy(lbls_test[mask], 1-predictions[mask])

4
27
0.14814814814814814


14.81