In [4]:
import import_ipynb
import os
import sys
sys.path.append('../../')  # Go up two folders to the project root


from structures.DocumentIndexRow import DocumentIndexRow

In [5]:
import pytest
import ipytest

ipytest.autoconfig()

In [6]:
%%ipytest

# TEST FOR "DOCUMENT INDEX ROW"
def test_document_index_row_structure():
    row = DocumentIndexRow(5, "Hello world")
    assert row.document_length == 2
    assert row.document_length == row.count_words("Hello world")
    assert row.doc_id == 5
    assert row.to_string() == "5 2"

    row = DocumentIndexRow(6, "Testing multiple words in a sentence.")
    assert row.document_length == 6
    assert row.to_string() == "6 6"

    # Empty document
    row = DocumentIndexRow(7, "")
    assert row.document_length == 0
    assert row.to_string() == "7 0"

    # "Doc_id" must be an integer
    try:
        row = DocumentIndexRow("ciao", "world")
    except ValueError as e:
        assert str(e) == "doc_no must be an integer and text must be a string."

    # "Text" must be a string
    try:
        row = DocumentIndexRow(8, 123)
    except ValueError as e:
        assert str(e) == "doc_no must be an integer and text must be a string."

    # Parameter for "count_words" must be a string.
    try:
        row.count_words(56)
    except ValueError as e:
        assert str(e) == "text must be a string."

    assert row.count_words("") == 0

[32m.[0m[32m                                                                                            [100%][0m
[32m[32m[1m1 passed[0m[32m in 0.02s[0m[0m


In [7]:
%%ipytest

def test_write_doc_index_row_on_disk():
    
    d_ind_row = DocumentIndexRow(3,"what do you doing")
    assert d_ind_row.doc_id == 3
    assert d_ind_row.document_length == 4  
    
    if os.path.exists("prova.bin"):
        os.remove("prova.bin")

    # write in position 0
    new_free_offset = d_ind_row.write_debug("prova.bin", 0)
    
    #Read it again and check all field are correctly present in binary format.
    with open("prova.bin", 'rb') as file:
        binaryData=file.read()

        assert len(binaryData) == d_ind_row.SIZE_DOC_INDEX_ROW
        assert binaryData[0]   == 3
        assert binaryData[4]   == 4
        assert new_free_offset == d_ind_row.SIZE_DOC_INDEX_ROW

    d_ind_row2 = DocumentIndexRow(67,"One Ring to rule them all, One Ring to find them, One Ring to bring them all, and in the darkness bind them.")  
    
    #Write a second block in the position returned from previous method.
    new_free_offset = d_ind_row2.write_debug("prova.bin", new_free_offset)
    
    #Read all what is returned, check that the previous block is still present and new block is stored correctly
    with open("prova.bin", 'rb') as file:
        binaryData=file.read()
        
        assert len(binaryData)==d_ind_row.SIZE_DOC_INDEX_ROW*2
        
        assert binaryData[0]   == 3
        assert binaryData[4]   == 4
        
        assert new_free_offset==d_ind_row.SIZE_DOC_INDEX_ROW*2
        
        assert binaryData[8] == 67
        assert binaryData[12] == 23
        
    os.remove("prova.bin") 


def test_read_doc_index_row_on_disk():
    d_ind_row = DocumentIndexRow(3,"what do you doing")
    d_ind_row2 = DocumentIndexRow(67,"One Ring to rule them all, One Ring to find them, One Ring to bring them all, and in the darkness bind them.")  

    new_offset = d_ind_row.write_debug("prova.bin", 0)
    d_ind_row2.write_debug("prova.bin", new_offset)

    new_doc_index_row = DocumentIndexRow(50, "I am gonna make him an offer he can't refuse.")

    assert new_doc_index_row.doc_id == 50
    assert new_doc_index_row.document_length == 10
    
    new_doc_index_row.read_debug("prova.bin",0)
    
    assert new_doc_index_row.doc_id == 3
    assert new_doc_index_row.document_length == 4
    
    new_doc_index_row.read_debug("prova.bin",new_offset)
    
    assert new_doc_index_row.doc_id == 67
    assert new_doc_index_row.document_length == 23
    
    assert new_doc_index_row.read_debug("prova.bin",800) == None
    
    os.remove("prova.bin") 

[32m.[0m[32m.[0m[32m                                                                                           [100%][0m
[32m[32m[1m2 passed[0m[32m in 0.08s[0m[0m
