In [11]:
from pyserini.search import FaissSearcher, LuceneSearcher
from pyserini.search.faiss import AutoQueryEncoder
from pyserini.search.hybrid import HybridSearcher
from pyserini.search import get_topics, get_qrels
import json
from tqdm import tqdm

In [4]:
corpus = LuceneSearcher('index_relation_fb')

In [26]:
# BM25 search

In [5]:
bm25_searcher = LuceneSearcher('index_relation_fb')

In [6]:
hits = bm25_searcher.search('newton per metre is the unit of measurement for surface tension in which system of measurement?', k=20)
for hit in hits:
    print(json.loads(corpus.doc(str(hit.docid)).raw()))

{'id': '18202', 'contents': 'measurement unit surface tension unit tension in newtons per meter', 'rel_ori': 'measurement_unit.surface_tension_unit.tension_in_newtons_per_meter'}
{'id': '18203', 'contents': 'measurement unit surface tension unit measurement system', 'rel_ori': 'measurement_unit.surface_tension_unit.measurement_system'}
{'id': '20530', 'contents': 'measurement unit measurement system surface tension units', 'rel_ori': 'measurement_unit.measurement_system.surface_tension_units'}
{'id': '18525', 'contents': 'measurement unit unit of surface density measurement system', 'rel_ori': 'measurement_unit.unit_of_surface_density.measurement_system'}
{'id': '20509', 'contents': 'measurement unit measurement system surface density units', 'rel_ori': 'measurement_unit.measurement_system.surface_density_units'}
{'id': '17153', 'contents': 'measurement unit unit of volumetric flow rate rate in cubic metres per second', 'rel_ori': 'measurement_unit.unit_of_volumetric_flow_rate.rate_in_

In [29]:
# Contriever Search

In [7]:
query_encoder = AutoQueryEncoder(encoder_dir='facebook/contriever', pooling='mean')
contriever_searcher = FaissSearcher('freebase_contriever_index', query_encoder)

'(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /facebook/contriever/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f46a3152b80>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: 3b1afbd2-9dc5-4a48-a3a4-a6eba1a7ee9d)')' thrown while requesting HEAD https://huggingface.co/facebook/contriever/resolve/main/config.json
'(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /facebook/contriever/resolve/main/tokenizer_config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f469ba01430>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: 6bd4d6e3-13ce-4f47-afb3-1f2f1ede5839)')' thrown while requesting HEAD https://huggingface.co/facebook/contriever/resolve/main/tokenizer_config.json


In [8]:
hits = contriever_searcher.search('newton per metre is the unit of measurement for surface tension in which system of measurement?', k=20)
for hit in hits:
    print(json.loads(corpus.doc(str(hit.docid)).raw()))

{'id': '18202', 'contents': 'measurement unit surface tension unit tension in newtons per meter', 'rel_ori': 'measurement_unit.surface_tension_unit.tension_in_newtons_per_meter'}
{'id': '20530', 'contents': 'measurement unit measurement system surface tension units', 'rel_ori': 'measurement_unit.measurement_system.surface_tension_units'}
{'id': '18525', 'contents': 'measurement unit unit of surface density measurement system', 'rel_ori': 'measurement_unit.unit_of_surface_density.measurement_system'}
{'id': '17162', 'contents': 'measurement unit unit of force force in newtons', 'rel_ori': 'measurement_unit.unit_of_force.force_in_newtons'}
{'id': '18526', 'contents': 'measurement unit unit of surface density density in kilograms per square meter', 'rel_ori': 'measurement_unit.unit_of_surface_density.density_in_kilograms_per_square_meter'}
{'id': '18203', 'contents': 'measurement unit surface tension unit measurement system', 'rel_ori': 'measurement_unit.surface_tension_unit.measurement_s

In [36]:
# hybrid searcher that combine BM25 and Contriever

In [12]:
hsearcher = HybridSearcher(contriever_searcher, bm25_searcher)
hits = hsearcher.search('newton per metre is the unit of measurement for surface tension in which system of measurement?', k=20)
for hit in hits:
    print(json.loads(corpus.doc(str(hit.docid)).raw()))

{'id': '18202', 'contents': 'measurement unit surface tension unit tension in newtons per meter', 'rel_ori': 'measurement_unit.surface_tension_unit.tension_in_newtons_per_meter'}
{'id': '20530', 'contents': 'measurement unit measurement system surface tension units', 'rel_ori': 'measurement_unit.measurement_system.surface_tension_units'}
{'id': '18203', 'contents': 'measurement unit surface tension unit measurement system', 'rel_ori': 'measurement_unit.surface_tension_unit.measurement_system'}
{'id': '18525', 'contents': 'measurement unit unit of surface density measurement system', 'rel_ori': 'measurement_unit.unit_of_surface_density.measurement_system'}
{'id': '20509', 'contents': 'measurement unit measurement system surface density units', 'rel_ori': 'measurement_unit.measurement_system.surface_density_units'}
{'id': '17153', 'contents': 'measurement unit unit of volumetric flow rate rate in cubic metres per second', 'rel_ori': 'measurement_unit.unit_of_volumetric_flow_rate.rate_in_