In [11]:
# imports
import json
import numpy as np

In [33]:
def test_accuracy_from_files_no_dict(tst_file, predictions_file, num_uniuqe_predictions=5, bias=False):
    '''
    This functions tests the accuracy of predictions made by Pass2Path and saved in a predictions file on
    the input test file.
    Inputs:
    tst_file - txt file in which every line is of the form: original_password<tab>target_password
    predictions_file - txt file in which every line is of the form: original_password<tab>json_list(prediction,score)
    num_uniuqe_predictions - number of uniuqe prediction to consider in the accuracy calculation. Predictions will be taken
                            in descending order of their score.
    This version does not use dictionary - number of lines in both files must match.
    Output: accuracy
    '''
    match_vec = []
    with open(tst_file, 'r') as f_tst:
        with open(predictions_file, 'r') as f_pred:
            for l, pred_line in enumerate(f_pred):
                orig_1, j_list = pred_line.split('\t')
                test_line = f_tst.readline()
                orig_2, target =  test_line.split('\t')
                target = target.split('\n')[0]
#                 print(target)
                if (orig_1 != orig_2):
                    # Error
                    print("Error: Lines in file do not match. Stopping...")
                    break
                predictions_and_scores = json.loads(j_list)
                predictions = [pred[0] for pred in predictions_and_scores]
                # Take uniuqe predicions and add the original password as a guess
                seen = set()
                seen_add = seen.add
                if (bias):
                    unq_predictions = [orig_1]
                    seen_add(orig_1)
                else:
                    unq_predictions = []
                unq_predictions += [x for x in predictions if not (x in seen or seen_add(x))]
                unq_predictions = unq_predictions[:num_uniuqe_predictions]
                if (target in unq_predictions):
                    match_vec.append(True)
                else:
                    match_vec.append(False)
        total_samples = l + 1
        if (total_samples != len(match_vec)):
            print("Error: Total tested samples ({}) does not match the number of lines in the files ({})"
                  .format(len(match_vec), total_samples))
        print("Accuracy calculated over {} samples".format(total_samples))
#         print(match_vec)
#         print(np.array(match_vec))
        acc = np.mean(np.array(match_vec))
        return acc
#                 if (l == 30):
#                     print(orig_1 + '\t' + ','.join(unq_predictions))
                    
                

In [36]:
def test_accuracy_from_files(tst_file, predictions_file, num_uniuqe_predictions=5, bias=False):
    '''
    This functions tests the accuracy of predictions made by Pass2Path and saved in a predictions file on
    the input test file.
    Inputs:
    tst_file - txt file in which every line is of the form: original_password<tab>target_password
    predictions_file - txt file in which every line is of the form: original_password<tab>json_list(prediction,score)
    num_uniuqe_predictions - number of uniuqe prediction to consider in the accuracy calculation. Predictions will be taken
                            in descending order of their score.
    This version uses dictionary - number of lines in both files may not match, accuracy is calculated on the
                                number o samples (lines) in the prediction file.
    Output: accuracy
    '''
    match_vec = []
    test_dict = {}
    with open(tst_file, 'r') as f_tst:
        for line in f_tst:
            orig, target = line.split('\t')
            target = target.split('\n')[0]
            test_dict[orig] = target
    
    with open(predictions_file, 'r') as f_pred:
        for l, pred_line in enumerate(f_pred):
                orig, j_list = pred_line.split('\t')
                predictions_and_scores = json.loads(j_list)
                predictions = [pred[0] for pred in predictions_and_scores]
                # Take uniuqe predicions and add the original password as a guess
                seen = set()
                seen_add = seen.add
                if (bias):
                    unq_predictions = [orig]
                    seen_add(orig)
                else:
                    unq_predictions = []
                unq_predictions += [x for x in predictions if not (x in seen or seen_add(x))]
                unq_predictions = unq_predictions[:num_uniuqe_predictions]
                target = test_dict.get(orig)
                if (target is not None):
                    if (target in unq_predictions):
                        match_vec.append(True)
                    else:
                        match_vec.append(False)
    total_samples = l + 1
    print("Accuracy calculated over {} samples".format(total_samples))
#         print(match_vec)
#         print(np.array(match_vec))
    acc = np.mean(np.array(match_vec))
    return acc
#                 if (l == 30):
#      

In [39]:
tst_file = 'test_full_email_100000.txt'
predictions_file = 'pass2path_-1_test_full_email_100000.predictions'
acc = test_accuracy_from_files(tst_file, predictions_file, num_uniuqe_predictions=10, bias=True)
print(acc)

Accuracy calculated over 100000 samples
0.09384
