In [1]:
%cd /Users/natalipeeva/Documents/GitHub/Automatic-Answering-of-City-Council-Questions/

/Users/natalipeeva/Documents/GitHub/Automatic-Answering-of-City-Council-Questions


In [2]:
import sys
import os
src_dir = os.path.join(os.getcwd(), 'src')
sys.path.append(src_dir)

from read_data.read_data import read_urls_questions, get_questions, get_url_content_tuples, get_relevant_docs
from retrieval.sparse_retrieval.tfidf import perform_tfidf_search
from retrieval.evaluation.evaluate import calculate_recall_at_k, calculate_average_recall

In [3]:
collected, questions =  read_urls_questions('data/reference_urls/reference_urls_collected.csv',
                                            'data/question_answer/questions_updated_urls.csv',
                                            clean_url_nan=True) # read collected urls and questions + remove unsuccessful collection

In [4]:
question_list = get_questions(questions)
document_list = get_url_content_tuples(collected)

In [5]:
all_results = perform_tfidf_search(question_list, document_list)

In [6]:
relevant_docs = get_relevant_docs(questions)

In [14]:
def recall_k(results, relevant_docs, k):
    """
    Compute Recall@K
    Input:
        results: A sorted list of tuples (document_id, score), with the most relevant document in the first position
        relevant_docs: A set of relevant documents.
        k: the cut-off
    Output: Recall@K
    """
    # Get unique document IDs from the results and relevant_docs
    unique_results = list(set(result[0] for result in results))
    unique_relevant_docs = list(set(relevant_docs))

    if k > len(unique_results):
        k = len(unique_results)

    relevant_count = 0
    for i in range(k):
        if unique_results[i] in unique_relevant_docs:  # Check if result is in relevant
            relevant_count += 1

    k_recall = float(relevant_count) / float(len(unique_relevant_docs))

    return k_recall


def get_k_urls(bm25_results):
    urls = []
    counter = 1
    for result in bm25_results:

        urls.append((result['url'], result['score']))
        counter+=1
    return urls

def calculate_recall_at_k(queries, search_results, relevant_docs, k_values, ranking_method):
    """
    Calculate Recall@K for a list of queries and specified values of k.
    Input:
        queries - a list of queries
        collection: a list of tuples (document_id, document_content)
        relevant_docs: a dictionary where the key is the query and the value is a set of relevant document IDs
        k_values: a list of k values for recall calculation
    Output: a dictionary where the key is the query and the value is a dictionary of recall values at each k
    """
    recall_results = {}

    for query in queries:
        if ranking_method=='bm25':
            results = get_k_urls(search_results[query][1])
        elif ranking_method=='tf-idf':
            results = search_results[query]
        relevant = relevant_docs[query]

        recall_values = {}
        for k in k_values:
            recall = recall_k(results, relevant, k)
            recall_values[k] = recall

        recall_results[query] = recall_values

    return recall_results


def calculate_average_recall(recall_results):
    """
    Calculate the average recall values per recall type from a dictionary of recall values.
    Input:
        recall_results: a dictionary where the key is the query and the value is a dictionary of recall values at each k
    Output: a dictionary where the key is the recall type and the value is the average recall value
    """
    average_recall = {}

    for query_recall in recall_results.values():
        for k, recall in query_recall.items():
            if k not in average_recall:
                average_recall[k] = 0.0
            average_recall[k] += recall

    query_count = len(recall_results)
    for k in average_recall:
        average_recall[k] /= query_count

    return average_recall

In [15]:
recalls = calculate_recall_at_k(question_list, all_results, relevant_docs, [1, 5, 10], 'tf-idf') # think of a metric similarity recall? 

In [16]:
calculate_average_recall(recalls)

{1: 0.007352941176470588, 5: 0.0428921568627451, 10: 0.07598039215686274}

In [19]:
predictions = []
for question in all_results.keys():
    urls = []
    for result in all_results[question]:
        urls.append(result)
    predictions.append(list(set(urls)))


true = []
for question in relevant_docs.keys():
    true.append(list(set(relevant_docs[question])))



import numpy as np
from irmetrics.topk import rr

# Calculate the Mean Reciprocal Rank for each question
mrr_values = []
for i in range(len(predictions)):
    true_values = true[i]
    mrr = rr(true_values, predictions[i])
    mrr_values.append(mrr)

# Calculate the average Mean Reciprocal Rank
average_mrr = np.mean(mrr_values)

print("Average Mean Reciprocal Rank:", average_mrr)
 

Average Mean Reciprocal Rank: 0.009435190005205622


In [21]:
import numpy as np
from irmetrics.topk import recall

# Calculate the Mean Reciprocal Rank for each question
mrr_values = []
for i in range(len(predictions)):
    true_values = true[i]
    mrr = recall(true_values, predictions[i], k=10)
    mrr_values.append(mrr)

# Calculate the average Mean Reciprocal Rank
average_recall = np.mean(mrr_values)

print("Average Mean Reciprocal Rank:", average_recall)

Average Mean Reciprocal Rank: 0.008849557522123894


#### Run to save results

In [9]:
import pickle # if save
with open ('data/results/tfidf_results.pickle', 'wb') as f:
    pickle.dump(all_results, f)