In [1]:
import os
import import_ipynb
import sys
sys.path.append('../../')  # Go up two folders to the project root

from typing import List, Tuple

from query_processing.MaxScore import Max_Score
from structures.PostingListHandler import Posting_List_Reader
from structures.LexiconRow import LexiconRow
from structures.InvertedIndex import Posting

import ipytest
ipytest.autoconfig()

import tempfile
import random

importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\query_processing\MaxScore.ipynb
importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\structures\DocumentIndex.ipynb
importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\utilities\General_Utilities.ipynb
importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\structures\DocumentIndexRow.ipynb
importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\structures\Lexicon.ipynb
importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\structures\LexiconRow.ipynb
importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\building_data_structures\CollectionStatistics.ipynb
importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\structures\PostingLi

In [2]:
%%ipytest

def test_MaxScore():
    max_score_instance = Max_Score()

    # ####################  test open posting lists
    max_score_instance.open_all_posting_lists()
    assert max_score_instance.file_DocIds.readable() is True
    assert max_score_instance.file_Freq.readable() is True
    assert max_score_instance.file_blocks.readable() is True
    assert max_score_instance.file_lexicon.readable() is True

    # ####################  Test reset_lists
    max_score_instance.reset_lists()
    assert len(max_score_instance.posting_readers) == 0
    assert len(max_score_instance.top_k_documents) == 0

    # ####################  Test initialize_and_sort_posting_lists
    mock_tokens = ["cat", "happiness", "and", "dog"]
    max_score_instance.initialize_and_sort_posting_lists(mock_tokens, "bm25")

    # Check if posting_readers are initialized and sorted correctly
    assert len(max_score_instance.posting_readers) == len(mock_tokens)

    # Check if posting_readers are sorted by term_upper_bound in ascending order
    for i in range(len(max_score_instance.posting_readers) - 1):
        assert max_score_instance.posting_readers[i]["term_upper_bound"] <= max_score_instance.posting_readers[i + 1]["term_upper_bound"]

    # Check if next has been called on each reader
    for reader in max_score_instance.posting_readers:
        assert reader["reader"].get_current_posting() is not None

    # Test initialize_and_sort_posting_lists with unsupported scoring function
    # with pytest.raises(ValueError, match="Not supported scoring function"):
    #    max_score_instance.initialize_and_sort_posting_lists(mock_tokens, "invalid_scoring_function")

    # #################### test AllListsExhausted() when posting_readers is empty
    max_score_instance.reset_lists()
    assert len(max_score_instance.posting_readers) == 0

    exhausted, current_docs = max_score_instance.all_lists_exhausted()
    assert exhausted is True
    assert current_docs == []

    # Define mock posting readers
    mock_readers = [
        {"reader": Posting_List_Reader(LexiconRow("mock1"), False, None, None, None), "term_upper_bound": 0.5},
        {"reader": Posting_List_Reader(LexiconRow("mock2"), False, None, None, None), "term_upper_bound": 0.3},
        {"reader": Posting_List_Reader(LexiconRow("mock3"), False, None, None, None), "term_upper_bound": 0.7},
    ]

    max_score_instance.posting_readers = mock_readers

    # ##################### Test all_lists_exhausted when some readers have reached the end
    for reader in mock_readers:
        reader["reader"].get_current_posting = lambda: None

    exhausted, current_docs = max_score_instance.all_lists_exhausted()
    assert exhausted is True
    assert current_docs == [None, None, None]

    # Test all_lists_exhausted when all readers are still active
    for reader in mock_readers:
        reader["reader"].get_current_posting = lambda: Posting(doc_id=1, frequency=1)

    exhausted, current_docs = max_score_instance.all_lists_exhausted()
    assert exhausted is False
    assert all(doc is not None for doc in current_docs)

    # ####################  Test min_doc when all lists are exhausted
    max_score_instance.reset_lists()
    assert len(max_score_instance.posting_readers) == 0
    index_first_essential = 0
    result = max_score_instance.min_doc(index_first_essential)
    assert result == (-1, -1)

    # test when there are valid reader    
    posting_readers = [
        {"reader": Posting_List_Reader(LexiconRow("mock1"), False, None, None, None), "term_upper_bound": 5},
        {"reader": Posting_List_Reader(LexiconRow("mock2"), False, None, None, None), "term_upper_bound": 3},
        {"reader": Posting_List_Reader(LexiconRow("mock3"), False, None, None, None), "term_upper_bound": 8},
        {"reader": Posting_List_Reader(LexiconRow("mock4"), False, None, None, None), "term_upper_bound": 2},
        {"reader": Posting_List_Reader(LexiconRow("mock5"), False, None, None, None), "term_upper_bound": 6}
    ]

    posting_readers[0]["reader"].get_current_posting = lambda:Posting(doc_id=10, frequency=3)
    posting_readers[1]["reader"].get_current_posting = lambda:Posting(doc_id=8, frequency=2)
    posting_readers[2]["reader"].get_current_posting = lambda:Posting(doc_id=15, frequency=5)
    posting_readers[3]["reader"].get_current_posting = lambda:Posting(doc_id=12, frequency=4)
    posting_readers[4]["reader"].get_current_posting = lambda:Posting(doc_id=11, frequency=1)
    
    max_score_instance.posting_readers = posting_readers
    assert len(max_score_instance.posting_readers) == 5

    index_first_essential = 1
    result = max_score_instance.min_doc(index_first_essential)
    assert result == (8, 2)

    index_first_essential = 2
    result = max_score_instance.min_doc(index_first_essential)
    assert result == (11, 1)

    ##################################################################
    # testa i vari compute_score


    #################################################################
    

    ##################################################################
    # testa la nextGEQ


    #################################################################








    

    # ############################ test update_minheap
    # initialize
    max_score_instance.reset_lists()
    max_score_instance.top_k_documents = [(3, 1), (5, 2), (7, 3)]

    # Test con MinHeap pieno e document_upper_bound maggiore del valore più basso nel MinHeap
    #  k = 3, document_upper_bound = 8, doc_to_process = 4, curr_threshold = 5
    result = max_score_instance.update_heap(3, 8, 4, 5)
    assert result == 8 # new threshold
    assert set(max_score_instance.top_k_documents) == {(5, 2), (7, 3), (8, 4)}

    # Test con MinHeap pieno e document_upper_bound minore del valore più basso nel MinHeap
    result = max_score_instance.update_heap(3, 4, 5, 8)
    assert result == 4  # new threshold
    assert set(max_score_instance.top_k_documents) == {(5, 2), (7, 3), (8, 4)}

    # Test con MinHeap non pieno
    result = max_score_instance.update_heap(4, 4, 6, -1)
    assert result == -1  # new threshold
    assert set(max_score_instance.top_k_documents) == {(7, 3), (8, 4), (5, 2), (4, 6)}

    # test with 100 random add
    max_score_instance.reset_lists()
    k = 3
    curr_threshold = -1
    max_score_instance.update_heap(k, 2.32 , 101, curr_threshold) # initialize
    for _ in range(100):
        doc_to_process = random.randint(1, 100)  # random document
        document_upper_bound = random.uniform(1.0, 10.0)  # random document_upper_bound
        
         # if extracted document_upper_bound is equal with the minimum value 
        is_minimum = (document_upper_bound == min(max_score_instance.top_k_documents, key=lambda x: x[1]))

        # update heap
        curr_threshold = max_score_instance.update_heap(k, document_upper_bound, doc_to_process, curr_threshold)

        # if doc_id has been added
        added = (doc_to_process in [tupla[1] for tupla in max_score_instance.top_k_documents])

        # test fail if value is the minimum but was added in the heap
        assert (is_minimum and added) == False
        
    # test close posting lists
    max_score_instance.close_all_posting_lists()
    assert max_score_instance.file_DocIds.closed is True
    assert max_score_instance.file_Freq.closed is True
    assert max_score_instance.file_blocks.closed is True
    assert max_score_instance.file_lexicon.closed is True

test_MaxScore()

[32m.[0m[32m                                                                                            [100%][0m
[32m[32m[1m1 passed[0m[32m in 0.01s[0m[0m
