# Tests of Subsection source handling

### Imports

In [1]:
from typing import List
from pathlib import Path
from pprint import pprint
import re
import sys

import pandas as pd
import xlwings as xw

from buffered_iterator import BufferedIterator
import text_reader as tp
from sections import Rule, RuleSet, SectionBreak, ProcessingMethods, Section


### Logging

In [2]:
import logging
logging.basicConfig(format='%(name)-20s - %(levelname)s: %(message)s')
#logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger('Text Processing')
#logger.setLevel(logging.DEBUG)
logger.setLevel(logging.INFO)


### Function to compare context for two sections.

In [3]:
def compare_context(section1, section2):
    ctx_template = '{key:16s}:\t{item1:16s}\t{item2:16s}'
    context_1 = section1.context
    context_2 = section2.context
    keys_1 = set(context_1.keys())
    keys_2 = set(context_2.keys())
    all_keys = keys_1 | keys_2
    for key in all_keys:
        item1 = context_1.get(key, '')
        item2 = context_2.get(key, '')
        ctx_str = ctx_template.format(key=str(key), item1=str(item1), item2=str(item2))
        print(ctx_str)

### Source as Sequence of Integers

In [4]:
n = 12
str_source = [str(i) for i in range(n)]
int_source = [i for i in range(n)]

### Even and Odd Tests
> `evens = [x for x in range(10) if x%2 == 0]`
> 
> `evens = [x for x in range(10) if x&1 == 0]`
> 
> `odds = [x for x in range(10) if x&1 != 0]`
> 
> `odds = [x for x in range(10) if x&1 == 1]`


In [5]:
is_even = lambda x: x&1 == 0
is_odd = lambda x: x&1 == 1

## Define Sections


In [6]:
blank_section = Section()
blank_section.read(int_source)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]

In [7]:
sub_section = Section(
    name='SubSection',
    start_section=SectionBreak(is_odd, break_offset='After', name='Odd_start')
    )

In [8]:
pprint(sub_section.read(int_source))

[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]


In [9]:
full_section = Section(
    name='Full',
    start_section=SectionBreak(is_odd, 
                               break_offset='Before', 
                               name='Odd Start FullSection'),
    end_section=SectionBreak(is_even, 
                             break_offset='After', 
                             name='Even End FullSection'
                             ),
    processor=sub_section
    )
pprint(full_section.read(int_source))

[[2]]


In [10]:
full_section = Section(
    name='Full',
    start_section=SectionBreak(is_odd, 
                               break_offset='Before', 
                               name='Odd Start FullSection'),
    end_section=SectionBreak(is_odd, 
                             break_offset='After', 
                             name='Odd End FullSection'
                             ),
    processor=sub_section
    )
pprint(full_section.read(int_source))

[[2, 3]]


In [11]:
multi_section = Section(name='Multi',
    processor=full_section
    )
pprint(multi_section.read(int_source))

[[[2, 3]], [[6, 7]], [[10, 11]]]


In [12]:
full_section = Section(
    name='Full',
    start_section=SectionBreak(is_odd, 
                               break_offset='Before', 
                               name='Odd Start FullSection'),
    end_section=SectionBreak(is_odd, 
                             break_offset='Before', 
                             name='Odd Before End FullSection'
                             ),
    processor=sub_section
    )
pprint(full_section.read(int_source))

[[2]]


In [13]:
multi_section = Section(name='Multi',
    processor=full_section
    )
pprint(multi_section.read(int_source))

[[[2]], [[4]], [[6]], [[8]], [[10]]]


In [14]:
odd_section = Section(
    start_section=SectionBreak(is_odd, break_offset='Before',name='Odd_start'),
    end_section=SectionBreak(is_even, break_offset='Before',name='Odd_start'),
    name='OddSection')

even_section = Section(
    start_section=SectionBreak(is_even, break_offset='Before',name='Even_start'),
    end_section=SectionBreak(is_odd, break_offset='Before',name='Even_start'),
    name='EvenSection')

all_sections = Section(processor=[odd_section, even_section])
all_sections.read(int_source)

[[[1], [2]], [[3], [4]], [[5], [6]], [[7], [8]], [[9], [10]], [[11], []]]

In [15]:
odd_section = Section(
    start_section=SectionBreak(is_odd, break_offset='Before',name='Odd_start'),
    end_section=SectionBreak(is_odd, break_offset='After',name='Odd_start'),
    name='OddSection', end_on_first_item=True)

even_section = Section(
    start_section=SectionBreak(is_even, break_offset='Before',name='Even_start'),
    end_section=SectionBreak(is_even, break_offset='After',name='Even_start'),
    name='EvenSection', end_on_first_item=True)

all_sections = Section(processor=[even_section, odd_section])
all_sections.read(int_source)

[[[0], [1]], [[2], [3]], [[4], [5]], [[6], [7]], [[8], [9]], [[10], [11]]]

In [16]:
full_section = Section(
    name='Full',
    start_section=SectionBreak(is_odd, 
                               break_offset='Before', 
                               name='Odd Start FullSection'),
    end_section=SectionBreak(is_odd, 
                             break_offset='Before', 
                             name='Odd Before End FullSection'
                             ),
    processor=[odd_section, even_section]
    )
pprint(full_section.read(int_source))

[[[1], [2]]]


In [17]:
full_section = Section(
    name='Full',
    start_section=SectionBreak(is_odd, 
                               break_offset='Before', 
                               name='Odd Start FullSection'),
    end_section=SectionBreak(is_odd, 
                             break_offset='After', 
                             name='Odd End FullSection'
                             ),
    processor=[odd_section, even_section]
    )
pprint(full_section.read(int_source))

[[[1], [2]], [[3], []]]


In [18]:
odd_rule = Rule(is_odd, pass_method='Original', fail_method='None')
odd_section = Section(processor=[odd_rule, tp.drop_blanks])
odd_section.read(int_source)

[1, 3, 5, 7, 9, 11]