# Tests of Subsection source handling

### Imports

In [1]:
from pprint import pprint
import random
from buffered_iterator import BufferedIterator

from sections import SectionBreak, Section
from sections import Rule, RuleSet, ProcessingMethods

### Logging

In [2]:
import logging
logging.basicConfig(format='%(name)-20s - %(levelname)s: %(message)s')
#logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger('Line Count Tests')
#logger.setLevel(logging.DEBUG)
logger.setLevel(logging.INFO)


### Source as Sequence of Integers

In [3]:
buffer_size = 5
num_items = 10

str_source = BufferedIterator((str(i) for i in range(num_items)), 
                              buffer_size=buffer_size)

int_source = BufferedIterator((i for i in range(num_items)), 
                              buffer_size=buffer_size)

# Test BufferedIterator Item Count

In [4]:
buffer_size = 5
num_items = 12


In [5]:
str_source = BufferedIterator((str(i) for i in range(num_items)),
                              buffer_size=buffer_size)
int_source = BufferedIterator((i for i in range(num_items)),
                              buffer_size=buffer_size)


### test_initial_count_values
Before iteration starts BufferedIterator.item_count=0.


In [6]:
print(str_source._item_count)
print(str_source.item_count)


0
0


### test_count_value_tracking
BufferedIterator.item_count should be one greater than the source item index 
for all items.


In [7]:
int_source = BufferedIterator((i for i in range(num_items)),
                              buffer_size=buffer_size)
for i in int_source:
    print(i+1, int_source.item_count)


1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10
11 11
12 12


### test_post_iteration_count_value
After iteration completes BufferedIterator.item_count should be the 
total number of items in the source.


In [8]:
int_source = BufferedIterator((i for i in range(num_items)),
                              buffer_size=buffer_size)

[i for i in int_source]

print(num_items, int_source.item_count)


12 12


### test_backup_count
When backup is called on a BufferedIterator iterator,
BufferedIterator.item_count should decrease by the corresponding amount.


In [9]:
str_source = BufferedIterator((str(i) for i in range(num_items-1)),
                              buffer_size=buffer_size)

fwd = random.randint(2, num_items-1)
back = random.randint(1, min(fwd-1, buffer_size-1))
print(f'Moving forward {fwd} steps; backing up {back} steps')

for i in range(fwd):
    next(str_source)
str_source.backup(back)

print(fwd-back, str_source.item_count)
print(int(str_source.previous_items[-1]), str_source.item_count-1)


Moving forward 5 steps; backing up 2 steps
3 3
2 2


### test_advance_count
When advance is called on a BufferedIterator iterator,
BufferedIterator.item_count should increase by the corresponding amount.


In [10]:
int_source = BufferedIterator((i for i in range(num_items)),
                              buffer_size=buffer_size)

fwd = random.randint(0, num_items-1)
adv = random.randint(1, min(num_items-fwd, buffer_size))
print(f'Moving forward {fwd} steps; advancing {adv} more steps')

for i in range(fwd):
    next(int_source)
int_source.advance(adv)

print(fwd+adv, int_source.item_count)
print(int_source.previous_items[-1], int_source.item_count-1)

Moving forward 11 steps; advancing 1 more steps
12 12
11 11


# TestBufferedIterator_goto_item(unittest.TestCase):


In [11]:
buffer_size = 5
num_items = 12

In [12]:

str_source = BufferedIterator((str(i) for i in range(num_items)),
                              buffer_size=buffer_size)

int_source = BufferedIterator((i for i in range(num_items)),
                              buffer_size=buffer_size)


### test_goto_forward(self):
`BufferedIterator.goto_item(n)` should make a call to `__next__()`
return the $n_th$ item in the sequence.


In [13]:
int_source = BufferedIterator((i for i in range(num_items)),
                              buffer_size=buffer_size)

fwd = random.randint(1, num_items-1)
max_adv = fwd + min(num_items-fwd+1, buffer_size)
item_choices = [i for i in range(fwd+1, max_adv)]
target_item = random.choice(item_choices)
print(f'Moving forward {fwd} steps; going to item {target_item}')

for i in range(fwd):
    next(int_source)
int_source.goto_item(target_item)

print(target_item, int_source.item_count)
print(next(int_source), int_source.item_count-1)


Moving forward 7 steps; going to item 11
11 11
11 11


### test_goto_backwards

`BufferedIterator.goto_item(n)` should make a call to `__next__()`
return the $n_th$ item in the sequence.  When moving backwards, `n` is
limited to `item_count - buffer_size` (the items in previous_items).


In [14]:
int_source = BufferedIterator((i for i in range(num_items)),
                              buffer_size=buffer_size)

fwd = random.randint(2, num_items)
item_idx = fwd - 1
buffer_len = min(buffer_size, fwd)
max_back = item_idx - buffer_len + 1
item_choices = [i for i in range(item_idx, max_back, -1)]
target_item = random.choice(item_choices)
print(f'Moving forward {fwd} steps; going to item {target_item}')

for i in range(fwd):
    next(int_source)
int_source.goto_item(target_item)

print(target_item, int_source.item_count)
print(next(int_source), int_source.item_count-1)


Moving forward 8 steps; going to item 4
4 4
4 4


### test_goto_beginning
BufferedIterator.goto_item(0) should restart the iterator.

- In order to move to the beginning the current location must be less
than the buffer size.


In [15]:
str_source = BufferedIterator((str(i) for i in range(num_items)),
                              buffer_size=buffer_size)

fwd = random.randint(1, min(buffer_size, num_items))
print(f'Moving forward {fwd} steps; going to item 0')
for i in range(fwd):
    next(str_source)
print(repr(str_source))
str_source.goto_item(0)

print(str_source.item_count)

Moving forward 5 steps; going to item 0
BufferedIterator(source=<generator object <genexpr> at 0x000002AE7DFF10B0>, buffer_size=5)
	BufferedIterator.previous_items = deque(['0', '1', '2', '3', '4'], maxlen=5)
	BufferedIterator.future_items = deque([], maxlen=5)
	BufferedIterator._step_back = 0
0


# Track source items used by processor

Process method should track the number of Source lines used for each processed line

Processor creates sequence of source.item_count for each output item
- Len(section.item_count) = # processed items
- section.item_count[-1] = # source items (includes skipped source items)
- Property item_count returns len(self._item_count)
- Property source_item_count returns self._item_count[-1]


## Tests

### Before source initialized
- Section.source_index is None
- Section.source_item_count is 0
- Section.item_count is 0

In [16]:
empty_section = Section(section_name='empty')

source_index = empty_section.source_index
source_items = empty_section.source_item_count
item_count = empty_section.item_count

print(f'Section index is: {repr(source_index)}  \t'
      f'Section Count {item_count}  \t'
      f'Section Source Index: {source_items}')


Section index is: None  	Section Count 0  	Section Source Index: 0


### At beginning of source
- Section.source_index is empty list
- Section.source_item_count is 0
- Section.item_count is 0

In [17]:
int_source = BufferedIterator((i for i in range(num_items)),
                              buffer_size=buffer_size)

not_started_section = Section(section_name='Not Started')
not_started_section.source = int_source

source_index = not_started_section.source_index
source_items = not_started_section.source_item_count
item_count = not_started_section.item_count

print(f'Section index is: {repr(source_index)}  \t'
      f'Section Count {item_count}  \t'
      f'Section Source Index: {source_items}')


Section index is: []  	Section Count 0  	Section Source Index: 0


### 1-to-1 match
- `range(n)` as source
- processor just returns item
- for each section item: 
  - item+1 = source.item_count
  - source.item_count = section.source_item_count 
  - source.item_count = section.item_count

In [18]:
int_source = BufferedIterator((i for i in range(num_items)),
                              buffer_size=buffer_size)

section_1_1 = Section(
    section_name='1-to-1 match',
    )

for item in section_1_1.process(int_source):
    source_count = int_source.item_count
    source_items = section_1_1.source_item_count
    item_count = section_1_1.item_count
    print(f'Item: {item}  \tSource Count: {source_count}  \t'
          f'Section Count {item_count}  \t'
          f'Section Source Index: {source_items}')


Item: 0  	Source Count: 1  	Section Count 1  	Section Source Index: 1
Item: 1  	Source Count: 2  	Section Count 2  	Section Source Index: 2
Item: 2  	Source Count: 3  	Section Count 3  	Section Source Index: 3
Item: 3  	Source Count: 4  	Section Count 4  	Section Source Index: 4
Item: 4  	Source Count: 5  	Section Count 5  	Section Source Index: 5
Item: 5  	Source Count: 6  	Section Count 6  	Section Source Index: 6
Item: 6  	Source Count: 7  	Section Count 7  	Section Source Index: 7
Item: 7  	Source Count: 8  	Section Count 8  	Section Source Index: 8
Item: 8  	Source Count: 9  	Section Count 9  	Section Source Index: 9
Item: 9  	Source Count: 10  	Section Count 10  	Section Source Index: 10
Item: 10  	Source Count: 11  	Section Count 11  	Section Source Index: 11
Item: 11  	Source Count: 12  	Section Count 12  	Section Source Index: 12


### 2-to-1 match
- range(n) as source
- processor converts 2 sucesssive source items into tuple of length 2
- for each section item: 
  - item = (source.item_count-1, source.item_count) 
  - source.item_count = section.source_item_count
  - source.item_count = (section.item_count + 1) // 2


In [19]:
int_source = BufferedIterator((i for i in range(num_items)), 
                              buffer_size=buffer_size)

def pairs(source):
    for item in source:
        yield tuple([item, next(source)])

section_2_1 = Section(
    section_name='1-to-1 match',
    processor=[pairs]
    )

for item in section_2_1.process(int_source):
    source_count = int_source.item_count
    source_items = section_2_1.source_item_count
    item_count = section_2_1.item_count
    print(f'Item: {item}    \tsource.item_count: {source_count}  \t'
          f'section.source_item_count: {source_items}  \t'
          f'section.item_count {item_count}')


Item: (0, 1)    	source.item_count: 2  	section.source_item_count: 2  	section.item_count 1
Item: (2, 3)    	source.item_count: 4  	section.source_item_count: 4  	section.item_count 2
Item: (4, 5)    	source.item_count: 6  	section.source_item_count: 6  	section.item_count 3
Item: (6, 7)    	source.item_count: 8  	section.source_item_count: 8  	section.item_count 4
Item: (8, 9)    	source.item_count: 10  	section.source_item_count: 10  	section.item_count 5
Item: (10, 11)    	source.item_count: 12  	section.source_item_count: 12  	section.item_count 6


### Skipped First Source Item
- (str(i) for i in range(n)) as source
- start_section='1', offset='Before' 
- processor returns int(item)
- for each section item: 
	- source.item_count = item
	- source.item_count = section.source.item_count 
	- source.item_count = section.item_count

In [20]:
str_source = BufferedIterator((str(i) for i in range(num_items)), 
                              buffer_size=buffer_size)

section_skip_0 = Section(
    section_name='Skipped First Source Item',
    start_section=SectionBreak('1', break_offset='Before')
    )

for item in section_skip_0.process(str_source):
    source_count = str_source.item_count
    source_items = section_skip_0.source_item_count
    item_count = section_skip_0.item_count
    print(f'Item: {item}  \tsource.item_count: {source_count}  \t'
          f'section.source_item_count: {source_items}  \t'
          f'section.item_count {item_count}')


Item: 1  	source.item_count: 2  	section.source_item_count: 2  	section.item_count 1
Item: 2  	source.item_count: 3  	section.source_item_count: 3  	section.item_count 2
Item: 3  	source.item_count: 4  	section.source_item_count: 4  	section.item_count 3
Item: 4  	source.item_count: 5  	section.source_item_count: 5  	section.item_count 4
Item: 5  	source.item_count: 6  	section.source_item_count: 6  	section.item_count 5
Item: 6  	source.item_count: 7  	section.source_item_count: 7  	section.item_count 6
Item: 7  	source.item_count: 8  	section.source_item_count: 8  	section.item_count 7
Item: 8  	source.item_count: 9  	section.source_item_count: 9  	section.item_count 8
Item: 9  	source.item_count: 10  	section.source_item_count: 10  	section.item_count 9
Item: 10  	source.item_count: 11  	section.source_item_count: 11  	section.item_count 10
Item: 11  	source.item_count: 12  	section.source_item_count: 12  	section.item_count 11


### Skipped First 2 Items
- (str(i) for i in range(n)) as source
- start_section='1', offset='After' 
- processor returns int(item)
- for each section item: 
	- source.item_count = item + 1
	- source.item_count = section.source_item_count
	- source.item_count = section.item_count + 2

In [21]:
str_source = BufferedIterator((str(i) for i in range(num_items)), 
                              buffer_size=buffer_size)

section_skip_2 = Section(
    section_name='Skipped First Source Item',
    start_section=SectionBreak('1', break_offset='After')
    )

for item in section_skip_2.process(str_source):
    source_count = str_source.item_count
    source_items = section_skip_2.source_item_count
    item_count = section_skip_2.item_count
    print(f'Item: {item}  \tsource.item_count: {source_count}  \t'
          f'section.source_item_count: {source_items}  \t'
          f'section.item_count {item_count}')


Item: 2  	source.item_count: 3  	section.source_item_count: 3  	section.item_count 1
Item: 3  	source.item_count: 4  	section.source_item_count: 4  	section.item_count 2
Item: 4  	source.item_count: 5  	section.source_item_count: 5  	section.item_count 3
Item: 5  	source.item_count: 6  	section.source_item_count: 6  	section.item_count 4
Item: 6  	source.item_count: 7  	section.source_item_count: 7  	section.item_count 5
Item: 7  	source.item_count: 8  	section.source_item_count: 8  	section.item_count 6
Item: 8  	source.item_count: 9  	section.source_item_count: 9  	section.item_count 7
Item: 9  	source.item_count: 10  	section.source_item_count: 10  	section.item_count 8
Item: 10  	source.item_count: 11  	section.source_item_count: 11  	section.item_count 9
Item: 11  	source.item_count: 12  	section.source_item_count: 12  	section.item_count 10


### Don't Count Dropped Items
- range(n) as source
- processor drops even items and yields odd items
- for each section item: 
	- item = source.item_count + 1
	- source.item_count = section.source_item_count
	- source.item_count = (section.item_count + 1) * 2 


In [22]:
int_source = BufferedIterator((i for i in range(num_items)), 
                              buffer_size=buffer_size)

def odd_nums(source):
    for item in source:
        if int(item)%2 == 1:
            yield item

section_odd = Section(
    section_name='Odd Numbers',
    processor=[odd_nums]
    )

for item in section_odd.process(int_source):
    source_count = int_source.item_count
    source_items = section_odd.source_item_count
    item_count = section_odd.item_count
    print(f'Item: {item}  \tsource.item_count: {source_count}  \t'
          f'section.source_item_count: {source_items}  \t'
          f'section.item_count {item_count}')


Item: 1  	source.item_count: 2  	section.source_item_count: 2  	section.item_count 1
Item: 3  	source.item_count: 4  	section.source_item_count: 4  	section.item_count 2
Item: 5  	source.item_count: 6  	section.source_item_count: 6  	section.item_count 3
Item: 7  	source.item_count: 8  	section.source_item_count: 8  	section.item_count 4
Item: 9  	source.item_count: 10  	section.source_item_count: 10  	section.item_count 5
Item: 11  	source.item_count: 12  	section.source_item_count: 12  	section.item_count 6


### Completed section
- (str(i) for i in range(n)) as source
- processor drops even items and yields odd items
- after section.read(source):
	- source.item_count = section.source_item_count
	- section.source_item_count = section.item_count = n * 2

In [23]:
int_source = BufferedIterator((i for i in range(num_items)), 
                              buffer_size=buffer_size)

def odd_nums(source):
    for item in source:
        if int(item)%2 == 1:
            yield item

section_odd = Section(
    section_name='Odd Numbers',
    processor=[odd_nums]
    )

item_list = section_odd.read(int_source)
pprint(item_list)

source_count = int_source.item_count
source_items = section_odd.source_item_count
item_count = section_odd.item_count
print(f'source.item_count: {source_count}  \t'
      f'section.source_item_count: {source_items}  \t'
      f'section.item_count {item_count}')


[1, 3, 5, 7, 9, 11]
source.item_count: 12  	section.source_item_count: 12  	section.item_count 6


### Partial Source Completed section
- (str(i) for i in range(n)) as source
- Random start_section and end_section 
- after section.read(source):
  - source.item_count = section.source_item_count
  - source.item_count = end_num
  - section.item_count = end_num - start_num


In [24]:
str_source = BufferedIterator((str(i) for i in range(num_items)), 
                              buffer_size=buffer_size)

start_num = random.randint(1, num_items-2)
end_num = random.randint(start_num + 1, num_items)
print(f'Starting at {start_num}; going to {end_num}.')

part_section = Section(
    section_name='2-to-1 match',
    start_section=str(start_num),
    end_section=str(end_num)
    )

item_list = part_section.read(str_source)
pprint(item_list)

source_count = str_source.item_count
source_items = part_section.source_item_count
item_count = part_section.item_count
print(f'source.item_count: {source_count}  \t'
      f'section.source_item_count: {source_items}  \t'
      f'section.item_count {item_count}  \t'
      f'Expected Count {end_num - start_num}')


Starting at 3; going to 5.
['3', '4']
source.item_count: 5  	section.source_item_count: 5  	section.item_count 2  	Expected Count 2


### Completed Section With End Before
- `(str(i) for i in range(n))` as source
- end_section='2', offset='Before' 
- after section.read(source):
  - source.item_count = section.source_item_count = section.item_count = 2

In [25]:
str_source = BufferedIterator((str(i) for i in range(num_items)), 
                              buffer_size=buffer_size)

section_end_before = Section(
    section_name='End Before',
    end_section=SectionBreak('2', break_offset='Before')
    )

item_list = section_end_before.read(str_source)
pprint(item_list)

source_count = str_source.item_count
source_items = section_end_before.source_item_count
item_count = section_end_before.item_count
print(f'source.item_count: {source_count}  \t'
      f'section.source_item_count: {source_items}  \t'
      f'section.item_count {item_count}  \t')


['0', '1']
source.item_count: 2  	section.source_item_count: 2  	section.item_count 2  	


### Completed Section With End After
- (str(i) for i in range(n)) as source
- end_section='2', offset='After' 
- after section.read(source):
	- source.item_count = section.source_item_count = section.item_count = 3

In [26]:
str_source = BufferedIterator((str(i) for i in range(num_items)), 
                              buffer_size=buffer_size)

section_end_before = Section(
    section_name='End Before',
    end_section=SectionBreak('2', break_offset='After')
    )


item_list = section_end_before.read(str_source)
pprint(item_list)

source_count = str_source.item_count
source_items = section_end_before.source_item_count
item_count = section_end_before.item_count
print(f'source.item_count: {source_count}  \t'
      f'section.source_item_count: {source_items}  \t'
      f'section.item_count {item_count}')


['0', '1', '2']
source.item_count: 3  	section.source_item_count: 3  	section.item_count 3


# Update() Tests


# Misc. examples

In [27]:
str_source = BufferedIterator((str(i) for i in range(num_items)), 
                              buffer_size=buffer_size)

start_num = random.randint(1, num_items-2)
end_num = random.randint(start_num + 1, num_items)
print(f'Starting at {start_num}; going to {end_num}.')

def pairs(source):
    for item in source:
        yield tuple([item, next(source)])

section_2_1 = Section(
    section_name='2-to-1 match',
    start_section=str(start_num),
    end_section=str(end_num),
    processor=[pairs]
    )

for item in section_2_1.process(str_source):
    source_count = str_source.item_count
    source_items = section_2_1.source_item_count
    item_count = section_2_1.item_count
    print(f'Item: {item}  \tsource.item_count: {source_count}  \t'
          f'section.source_item_count: {source_items}  \t'
          f'section.item_count {item_count}')


Starting at 6; going to 9.
Item: ('6', '7')  	source.item_count: 8  	section.source_item_count: 8  	section.item_count 1


In [28]:
str_source = BufferedIterator((str(i) for i in range(num_items)), 
                              buffer_size=buffer_size)

def pairs(source):
    for item in source:
        yield tuple([item, next(source)])
        
subsection = Section(
    section_name='subsection',
    start_section=SectionBreak('2', break_offset='After')
    )

section_2_1 = Section(
    section_name='2-to-1 match',
    start_section=SectionBreak('2', break_offset='Before'),
    end_section=SectionBreak('3', break_offset='After'),
    processor=[pairs],
    subsections=subsection
    )

for item in section_2_1.process(str_source):
    source_count = str_source.item_count
    source_items = section_2_1.source_item_count
    item_count = section_2_1.item_count
    print(f'Item: {item}  \tsource.item_count: {source_count}  \t'
          f'section.source_item_count: {source_items}  \t'
          f'section.item_count {item_count}')


Item: ('2', '3')  	source.item_count: 4  	section.source_item_count: 4  	section.item_count 1


In [29]:
int_source = BufferedIterator((i for i in range(num_items)),
                              buffer_size=buffer_size)

section_1_1 = Section(
    section_name='1-to-1 match',
    )

for item in section_1_1.process(int_source):
    source_count = int_source.item_count
    source_items = section_1_1.source_item_count
    item_count = section_1_1.item_count
    pprint(section_1_1._source_index)


[1]
[1, 2]
[1, 2, 3]
[1, 2, 3, 4]
[1, 2, 3, 4, 5]
[1, 2, 3, 4, 5, 6]
[1, 2, 3, 4, 5, 6, 7]
[1, 2, 3, 4, 5, 6, 7, 8]
[1, 2, 3, 4, 5, 6, 7, 8, 9]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
