In [3]:
import json

class EvaluationMetrics:
    def __init__(self, true_data_path, predictions_path):
        self.true_data_path = true_data_path
        self.predictions_path = predictions_path
        self.true_data = self.load_data(self.true_data_path)
        self.predictions = self.load_predictions(self.predictions_path)

    def load_data(self, filepath):
        with open(filepath, 'r') as file:
            return [json.loads(line)['answer_pids'] for line in file]
        
    def load_predictions(self, filepath):
        with open(filepath, 'r') as file:
            data = json.load(file)
        return [entry['sorted_non_zero_indices'] for entry in data]

    def calculate_recall(self, true_pids, pred_indices):
        true_set = set(true_pids)
        pred_set = set(pred_indices)
        if len(true_set) == 0:
            return 0
        return len(true_set & pred_set) / len(true_set)

    def calculate_precision_at_k(self, true_pids, pred_indices, k):
        true_set = set(true_pids)
        pred_set = set(pred_indices[:k])
        if len(pred_set) == 0:
            return 0
        return len(true_set & pred_set) / k

    def average_precision(self, true_pids, pred_indices):
        relevant = 0
        sum_precisions = 0
        for i, pred in enumerate(pred_indices):
            if pred in true_pids:
                relevant += 1
                sum_precisions += relevant / (i + 1)
        if relevant == 0:
            return 0
        return sum_precisions / len(true_pids)

    def calculate_reciprocal_rank(self, true_pids, pred_indices):
        for i, pred in enumerate(pred_indices):
            if pred in true_pids:
                return 1 / (i + 1)
        return 0

    def calculate_metrics(self):
        recalls = []
        precisions_k = []
        aps = []
        mrr_scores = []

        for true_ids, pred_ids in zip(self.true_data, self.predictions):
            recalls.append(self.calculate_recall(true_ids, pred_ids))
            precisions_k.append(self.calculate_precision_at_k(true_ids, pred_ids, 10))
            aps.append(self.average_precision(true_ids, pred_ids))
            mrr_scores.append(self.calculate_reciprocal_rank(true_ids, pred_ids))

        mean_recall = sum(recalls) / len(recalls)
        mean_precision_at_k = sum(precisions_k) / len(precisions_k)
        mean_ap = sum(aps) / len(aps)
        mean_mrr = sum(mrr_scores) / len(mrr_scores)

        print(f"Mean Recall: {mean_recall}")
        print(f"Precision@10: {mean_precision_at_k}")
        print(f"Mean Average Precision: {mean_ap}")
        print(f"Mean Reciprocal Rank: {mean_mrr}")

        return mean_recall, mean_precision_at_k, mean_ap, mean_mrr

import json

class EvaluationMetrics:
    def __init__(self, true_data_path, predictions_path):
        self.true_data_path = true_data_path
        self.predictions_path = predictions_path
        self.true_data = self.load_data(self.true_data_path)
        self.predictions = self.load_predictions(self.predictions_path)

    def load_data(self, filepath):
        with open(filepath, 'r') as file:
            return [json.loads(line)['answer_pids'] for line in file]
        
    def load_predictions(self, filepath):
        with open(filepath, 'r') as file:
            data = json.load(file)
        return [entry['sorted_non_zero_indices'] for entry in data]

    def calculate_recall(self, true_pids, pred_indices):
        true_set = set(true_pids)
        pred_set = set(pred_indices)
        if len(true_set) == 0:
            return 0
        return len(true_set & pred_set) / len(true_set)

    def calculate_precision_at_k(self, true_pids, pred_indices, k):
        true_set = set(true_pids)
        pred_set = set(pred_indices[:k])
        if len(pred_set) == 0:
            return 0
        return len(true_set & pred_set) / k

    def average_precision(self, true_pids, pred_indices):
        relevant = 0
        sum_precisions = 0
        for i, pred in enumerate(pred_indices):
            if pred in true_pids:
                relevant += 1
                sum_precisions += relevant / (i + 1)
        if relevant == 0:
            return 0
        return sum_precisions / len(true_pids)

    def calculate_reciprocal_rank(self, true_pids, pred_indices):
        for i, pred in enumerate(pred_indices):
            if pred in true_pids:
                return 1 / (i + 1)
        return 0

    def calculate_metrics(self):
        recalls = []
        precisions_k = []
        aps = []
        mrr_scores = []

        for true_ids, pred_ids in zip(self.true_data, self.predictions):
            recalls.append(self.calculate_recall(true_ids, pred_ids))
            precisions_k.append(self.calculate_precision_at_k(true_ids, pred_ids, 10))
            aps.append(self.average_precision(true_ids, pred_ids))
            mrr_scores.append(self.calculate_reciprocal_rank(true_ids, pred_ids))

        mean_recall = sum(recalls) / len(recalls)
        mean_precision_at_k = sum(precisions_k) / len(precisions_k)
        mean_ap = sum(aps) / len(aps)
        mean_mrr = sum(mrr_scores) / len(mrr_scores)

        print(f"Mean Recall: {mean_recall}")
        print(f"Precision@10: {mean_precision_at_k}")
        print(f"Mean Average Precision: {mean_ap}")
        print(f"Mean Reciprocal Rank: {mean_mrr}")

        return mean_recall, mean_precision_at_k, mean_ap, mean_mrr


evaluation=EvaluationMetrics('lifestyle/cleaned_queries.jsonl','lifestyle/tfidf_query_results.json')
evaluation.calculate_metrics()

Mean Recall: 0.9124547950214646
Precision@10: 0.08029865125240754
Mean Average Precision: 0.10545595236115267
Mean Reciprocal Rank: 0.2666826186832111


(0.9124547950214646,
 0.08029865125240754,
 0.10545595236115267,
 0.2666826186832111)