In [1]:
from Services.JSONHandler import JSONHandler
from Services.SearchEngine import SearchEngine
from Services.SearchEvaluator import SearchEvaluator
import json

from bson import ObjectId
from tqdm import tqdm  

from Services.MilvusDB import MilvusDB
import fasttext

MilvusDB.connect()
ft_model = fasttext.load_model('./Models/quora_model.bin')
class JSONEncoder(json.JSONEncoder):
    """ Extend json-encoder class to handle ObjectId types. """
    def default(self, obj):
        if isinstance(obj, ObjectId):
            return str(obj)
        return super().default(obj)

Connected to Milvus at localhost:19530


In [3]:
# Define file paths
dataset_path = './DataSets/Quora/JSON/quora-test-docs.json'
index_path = './DataSets/Quora/inverted-index.json'
tfidf_file = "./DataSets/Quora/tfidf-matrix.npz"
vectorizer_file = "./DataSets/Quora/vectorizer.pkl"
doc_ids_file_path = "./DataSets/Quora/doc_ids.json"
qrels_path = './DataSets/Quora/JSON/quora-test-qrels.json'

# Load qrels

qrels = SearchEvaluator.load_qrels(qrels_path)

# Load queries
queries = JSONHandler.convert_from_json('./DataSets/Quora/JSON/quora-test-queries.json')

# Initialize results dictionary
results_dict = {}
mrr_antique_sum = 0
map_antique_sum = 0
recall_antique_sum = 0
p10_antique_sum = 0

# Process each query and evaluate
for query in tqdm(queries, desc="Processing Queries"):
    query_id = query['query_id']
    query_text = query['text']
    search_results = SearchEngine.search_with_embedding(query_text, "quora", ft_model,10)
    results = SearchEvaluator.evaluate_search_engine_with_embedding(query_id, search_results, qrels)
    results_dict[query_id] = results
    mrr_antique_sum+=results["MRR"]
    map_antique_sum+=results["MAP"]
    recall_antique_sum+=results['R']
    p10_antique_sum+=results['P@10']

mrr_antique_avg = mrr_antique_sum/len(queries)
map_antique_avg = map_antique_sum/len(queries)
p10_antique_avg = p10_antique_sum/len(queries)
recall_antique_avg = recall_antique_sum/len(queries)

print(f'AVG MRR : {mrr_antique_avg} ')
print(f'AVG MAP : {map_antique_avg} ')
print(f'AVG P@10 : {p10_antique_avg} ')
print(f'AVG R : {recall_antique_avg} ')

Processing Queries: 100%|██████████| 10000/10000 [14:01<00:00, 11.88it/s]

AVG MRR : 0.7414669404761915 
AVG MAP : 0.6746483894757138 
AVG P@10 : 0.4566000000000521 
AVG R : 0.8615967882155703 



