# Import necessary libraries and modules and creating the Json Encoder class

In [1]:
from Services.JSONHandler import JSONHandler
import json
from bson import ObjectId
from tqdm import tqdm  # Import tqdm for the progress



class JSONEncoder(json.JSONEncoder):
    """ Extend json-encoder class to handle ObjectId types. """
    def default(self, obj):
        if isinstance(obj, ObjectId):
            return str(obj)
        return super().default(obj)

# Evalutation without any Extra 

Quora Evalutation

In [2]:
# Define file paths
from Services.SearchEngine import SearchEngine
from Services.SearchEvaluator import SearchEvaluator
from Services.TextProcessor import TextProcessor

dataset_path = './DataSets/Quora/JSON/quora-test-docs.json'
index_path = './DataSets/Quora/inverted-index.json'
tfidf_file = "./DataSets/Quora/tfidf-matrix.npz"
vectorizer_file = "./DataSets/Quora/vectorizer.pkl"
doc_ids_file_path = "./DataSets/Quora/doc_ids.json"
qrels_path = './DataSets/Quora/JSON/quora-test-qrels.json'

# Load qrels

qrels = SearchEvaluator.load_qrels(qrels_path)

# Load queries
queries = JSONHandler.convert_from_json('./DataSets/Quora/JSON/quora-test-queries.json')

# Initialize results dictionary
results_dict = {}
mrr_quora_sum = 0
map_quora_sum = 0
recall_quora_sum = 0
p10_quora_sum = 0

tfidf_matrix ,doc_ids = TextProcessor.load_tfidf_matrix_and_doc_ids(tfidf_file,doc_ids_file_path)
vectorizer = SearchEngine.load_vectorizer(vectorizer_file)

# Process each query and evaluate
for query in tqdm(queries, desc="Processing Queries"):
    query_id = query['query_id']
    query_text = query['text']
    search_results = SearchEngine.search_without_embedding(query_text, tfidf_matrix, "quora", vectorizer, doc_ids,0.7)
    results = SearchEvaluator.evaluate_search_engine(query_id, search_results, qrels)
    results_dict[query_id] = results
    mrr_quora_sum+=results["MRR"]
    map_quora_sum+=results["MAP"]
    recall_quora_sum+=results['R']
    p10_quora_sum+=results['P@10']

mrr_quora_avg = mrr_quora_sum/len(queries)
map_quora_avg = map_quora_sum/len(queries)
p10_quora_avg = p10_quora_sum/len(queries)
recall_quora_avg = recall_quora_sum/len(queries)

print(f'AVG MRR : {mrr_quora_avg} ')
print(f'AVG MAP : {map_quora_avg} ')
print(f'AVG P@10 : {p10_quora_avg} ')
print(f'AVG R : {recall_quora_avg} ')

Processing Queries: 100%|██████████| 10000/10000 [29:17<00:00,  5.69it/s] 

AVG MRR : 0.6514920484862434 
AVG MAP : 0.6050879859964369 
AVG P@10 : 0.3664256349206288 
AVG R : 0.7215754823951575 



