In [1]:
import import_ipynb
import sys
sys.path.append('../../')  # Go up two folders to the project root

from query_processing.DAAT import DAAT
from structures.LexiconRow import LexiconRow
from structures.PostingListHandler import Posting_List_Reader
from structures.InvertedIndex import Posting

import ipytest
ipytest.autoconfig()

import tempfile
import random

importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\query_processing\DAAT.ipynb
importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\structures\DocumentIndex.ipynb
importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\utilities\General_Utilities.ipynb
importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\structures\DocumentIndexRow.ipynb
importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\structures\Lexicon.ipynb
importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\structures\LexiconRow.ipynb
importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\building_data_structures\CollectionStatistics.ipynb
importing Jupyter notebook from C:\Users\gabri\Documents\GitHub\tests\Query_processing\../..\structures\PostingListHa

In [2]:
%%ipytest

def check_if_inserted(doc_id, real_action, my_heap) -> bool:
    """
        Pass to the function a doc_id and the list of most relevant document with score.
        Real_action is True if the doc_id needs to be inserted.
    """
    for index,elem in enumerate(my_heap):
        if my_heap[index][1] == doc_id and real_action == False:
            return False
        else:
            return True

def test_daat():
    # Set up a temporary directory for testing
    with tempfile.TemporaryDirectory() as temp_dir:
        daat_instance = DAAT()
        
        # Test open_all_posting_lists
        daat_instance.open_all_posting_lists()
        assert daat_instance.file_DocIds.readable() is True
        assert daat_instance.file_Freq.readable() is True
        assert daat_instance.file_blocks.readable() is True
        assert daat_instance.file_lexicon.readable() is True

        # Test reset_lists
        daat_instance.reset_lists()
        assert len(daat_instance.posting_readers) == 0
        assert len(daat_instance.top_k_documents) == 0

        # Test close_all_posting_lists
        daat_instance.close_all_posting_lists()
        assert daat_instance.file_DocIds.closed is True
        assert daat_instance.file_Freq.closed is True
        assert daat_instance.file_blocks.closed is True
        assert daat_instance.file_lexicon.closed is True

        # Test initialize_posting_lists (assuming tokens is a list of strings)
        daat_instance.open_all_posting_lists()
        tokens = ["happiness", "home", "between"]
        daat_instance.initialize_posting_lists(tokens)
        assert len(daat_instance.posting_readers) == len(tokens)
        daat_instance.close_all_posting_lists()

        # Test update_heap
        scoring_function = "bm25"
        daat_instance.update_heap(scoring_function, 0, 3, 2) # check if insert works
        assert len(daat_instance.top_k_documents) == 1
        daat_instance.update_heap(scoring_function, 1, 4, 2)
        assert len(daat_instance.top_k_documents) == 2
        
        for i in range(2,50): # generate 50 others doc_id
            freq = random.randint(1, 10) # with frequency
            score = daat_instance.scorer.choose_scoring_function(scoring_function, i, freq)
            to_insert = (score >= daat_instance.top_k_documents[0][0] or score >= daat_instance.top_k_documents[1][0])
            daat_instance.update_heap(scoring_function, i, freq, 2)

            assert len(daat_instance.top_k_documents) == 2 # check if lenght remains k
            assert check_if_inserted(i, to_insert, daat_instance.top_k_documents) == True # check if heap maintains only highest score

        # test scoreQuery
        daat = DAAT()
        my_list = ["happiness", "home", "between"]
        result = daat.scoreQuery(3, "bm25", my_list , False)
        assert len(result) <= 3        
        
        # Test min_doc
        daat_instance.reset_lists()
        assert daat_instance.min_doc() == (-1,-1)

        posting_reader = Posting_List_Reader(LexiconRow("a"), False, None, None, None)
        posting_reader.get_current_posting = lambda:Posting(50, 3)
        daat_instance.posting_readers.append(posting_reader)
        min_doc, _ = daat_instance.min_doc()
        assert min_doc == 50

        posting_reader = Posting_List_Reader(LexiconRow("b"), False, None, None, None)
        posting_reader.get_current_posting = lambda:Posting(500, 3)
        daat_instance.posting_readers.append(posting_reader)
        min_doc, _ = daat_instance.min_doc()
        assert min_doc == 50

        posting_reader = Posting_List_Reader(LexiconRow("c"), False, None, None, None)
        posting_reader.get_current_posting = lambda:Posting(5, 3)
        daat_instance.posting_readers.append(posting_reader)
        min_doc, _ = daat_instance.min_doc()
        assert min_doc == 5

        posting_reader = Posting_List_Reader(LexiconRow("d"), False, None, None, None)
        posting_reader.get_current_posting = lambda:Posting(5000, 3)
        daat_instance.posting_readers.append(posting_reader)
        min_doc, _ = daat_instance.min_doc()
        assert min_doc == 5

        posting_reader = Posting_List_Reader(LexiconRow("e"), False, None, None, None)
        posting_reader.get_current_posting = lambda:Posting(23, 3)
        daat_instance.posting_readers.append(posting_reader)
        min_doc, _ = daat_instance.min_doc()
        assert min_doc == 5

        posting_reader = Posting_List_Reader(LexiconRow("f"), False, None, None, None)
        posting_reader.get_current_posting = lambda:Posting(789, 3)
        daat_instance.posting_readers.append(posting_reader)
        min_doc, _ = daat_instance.min_doc()
        assert min_doc == 5

        # Test all_lists_exhausted
        daat_instance.reset_lists()
        exhausted, current_docs = daat_instance.all_lists_exhausted()
        assert exhausted is True
        assert all(doc is None for doc in current_docs)
    
        # Caso in cui almeno una lista non è esaurita    
        posting_reader = Posting_List_Reader(LexiconRow("x"), False, None, None, None)
        posting_reader.get_current_posting = lambda:Posting(31, 9)
        daat_instance.posting_readers.append(posting_reader)
        posting_reader = Posting_List_Reader(LexiconRow("y"), False, None, None, None)
        posting_reader.get_current_posting = lambda:Posting(29, 18)
        daat_instance.posting_readers.append(posting_reader)
    
        exhausted, current_docs = daat_instance.all_lists_exhausted()
        assert exhausted is False
        assert all(doc is not None for doc in current_docs)
        assert len(current_docs) == 2

[32m.[0m[32m                                                                                            [100%][0m
[32m[32m[1m1 passed[0m[32m in 0.03s[0m[0m
