In [60]:
import pandas as pd
from glob import glob
import os
from typing import List, Set
from statistics import mean
from sklearn.metrics import average_precision_score
import numpy as np

In [2]:
ground_truth = pd.read_csv('merged_annotation.csv', header=None, names=('Path', 'Score'), sep=',', skipinitialspace=True, quotechar="'")
ground_truth['id'] = ground_truth['Path'].str[:-4]
ground_truth.set_index('id', inplace=True)
# ground_truth.loc['55-If3ebb17bc7963bfa8d81f040'].Score (=='Off Topic')

In [80]:
def calculate_precision_at_k(run_result_path : str, k : int):
    df = pd.read_csv(run_result_path, delim_whitespace=True, header=None, names=('QueryId', 'Stance', 'ImageId', 'Score', 'Rank', 'Tag'))
    
    precision_history = []
    for (image_id, stance), results in df.groupby(['QueryId', 'Stance']):
        true_positives = 0
        for _, row in results.iterrows():
            if row['Rank'] > k:
                continue

            lookup_id = f'{row["QueryId"]}-{row["ImageId"]}'
            score = ground_truth.loc[lookup_id].Score
            if (stance == 'pro' and score == 'Pro') or (stance == 'con' and score == 'Con'):
                true_positives += 1
        precision_history.append(true_positives/k)
    
    print(f'Precision at {k}: {mean(precision_history)}')

def calculate_map(run_result_path : str):
    df = pd.read_csv(run_result_path, delim_whitespace=True, header=None, names=('QueryId', 'Stance', 'ImageId', 'Score', 'Rank', 'Tag'))
    
    ap_history = []
    for (query_id, stance), results in df.sort_values(['Rank'], ascending=True).groupby(['QueryId', 'Stance']):
        def get_true_score(row : pd.Series):
            true_stance = ground_truth.loc[row.LookupId].Score
            true_score = int((stance == 'pro' and true_stance == 'Pro') or (stance == 'con' and true_stance == 'Con'))
            return true_score

        results['LookupId'] = str(query_id) + '-' + results['ImageId']
        results['TrueScore'] = results.apply(get_true_score, axis=1)
        ap = average_precision_score(np.array(list(results['TrueScore']) + [1]), np.array([1] * len(results['Score']) + [0]) )
        ap_history.append(ap)
    
    print(f'Mean Average Precision (probably calculated wrong): {mean(ap_history)}')

for path in glob('run-results/run*.txt'):
    print(path)
    calculate_precision_at_k(path, 10)
    calculate_map(path)
    print()

run-results\run0.txt
Precision at 10: 0.15
Mean Average Precision (probably calculated wrong): 0.20145551948051948

run-results\run1.txt
Precision at 10: 0.13
Mean Average Precision (probably calculated wrong): 0.19378113275613276

run-results\run2.txt
Precision at 10: 0.1148936170212766
Mean Average Precision (probably calculated wrong): 0.30283931535039693

run-results\run3.txt
Precision at 10: 0.148
Mean Average Precision (probably calculated wrong): 0.22030833333333333

