# Subsections Issue

## Setup

### Imports

In [1]:
from typing import List
from pathlib import Path
from pprint import pprint
import re
import sys

import pandas as pd
import xlwings as xw

from buffered_iterator import BufferedIterator
import text_reader as tp
from sections import Rule, RuleSet, SectionBreak, ProcessingMethods, Section

### Logging

In [2]:
import logging
logging.basicConfig(format='%(name)-20s - %(levelname)s: %(message)s')
#logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger('Two Line SubSection Tests')
#logger.setLevel(logging.DEBUG)
logger.setLevel(logging.INFO)

### Display Functions

In [3]:
#%%writefile display_functions.py

In [4]:
# %% Function to compare context for two sections.
def compare_context(section1, section2):
    ctx_template = '{key:16s}:\t{item1:16s}\t{item2:16s}'
    context_1 = section1.context
    context_2 = section2.context
    keys_1 = set(context_1.keys())
    keys_2 = set(context_2.keys())
    all_keys = keys_1 | keys_2
    for key in all_keys:
        item1 = context_1.get(key, '')
        item2 = context_2.get(key, '')
        ctx_str = ctx_template.format(key=str(key), item1=str(item1), item2=str(item2))
        print(ctx_str)
        
        
# %% Compare Buffered Iterator contents
def buffered_iterator_compare(iter1, iter2=None, iter3=None, 
                              label1='From Iterator', 
                              label2='To Iterator', label3=''):
    
    def extract_attrs(buf_obj, requested_item, as_list=True):
        if not buf_obj:
            text = ''
        elif as_list:
            text = str(list(buf_obj.__getattribute__(requested_item)))
        else:
            text = str(buf_obj.__getattribute__(requested_item))
        return text
        
    def extract_attr_text(requested_item, iter1, iter2=None, iter3=None, 
                        as_list=True):    
        attr_text = {
            1: extract_attrs(iter1, requested_item, as_list),
            2: extract_attrs(iter2, requested_item, as_list),
            3: extract_attrs(iter3, requested_item, as_list),
        }
        return attr_text


    row_template = ''.join([
        '\t{Label:<20s}',
        '{first_iter_item:<35s}',
        '{second_iter_item:<35s}',
        '{third_iter_item:<35s}\n'
        ])   
    attr_group = {
        'Previous Items': ('previous_items', True),
        'Future Items': ('future_items', True),
        'Item Count': ('item_count', False),
        'Step Back': ('_step_back', False),
        'Buffer Size': ('buffer_size', False)
        }

    row_list = [
        row_template.format(
            Label='',
            first_iter_item=label1, 
            second_iter_item=label2, 
            third_iter_item=label3)
                ]

    for label, attr_s in attr_group.items():
        requested_item, as_list = attr_s
        text_group = extract_attr_text(requested_item, iter1, iter2, iter3, as_list)
        text_line = row_template.format(Label=label, 
                        first_iter_item=text_group[1],
                        second_iter_item=text_group[2],
                        third_iter_item=text_group[3])
        row_list.append(text_line)
    
    iterator_compare_str = ''.join(row_list)
    
    return iterator_compare_str

### Initialize 2-line Section Tests

#### 2-line Section Source

In [5]:
GENERIC_TEST_TEXT = [
    'Text to be ignored',
    'StartSection Name: A',
    'EndSection Name: A',
    'StartSection Name: B',
    'EndSection Name: B',
    'More text to be ignored',
    ]

## Iterator Options

### Two line section *StartSection*
- Start *Before* `StartSection`
- End *After* `EndSection`

In [6]:
two_line_section = Section(
    section_name='SubSection',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After')
    )

#### Using a regular iterator

In [7]:
test_iter = iter(GENERIC_TEST_TEXT)
pprint(two_line_section.read(test_iter))

pprint(two_line_section.read(test_iter))

['StartSection Name: A', 'EndSection Name: A']
[]


![Good](../examples/Valid.png) A regular iterator is missing the buffer, so *Step Back* calls can loose future items.
- The second call to `pprint(two_line_section.read(test_iter))` returns an empty list because the second *StartSection* line is lost.
  
|Expected|Actual|
|-|-|
|`['StartSection Name: A', 'EndSection Name: A']`|`['StartSection Name: A', 'EndSection Name: A']`|
|`[]`|`[]`|

#### Using a *BufferedIterator* source.

In [8]:
test_iter = BufferedIterator(GENERIC_TEST_TEXT)
pprint(two_line_section.read(test_iter))
pprint(two_line_section.read(test_iter))
pprint(two_line_section.read(test_iter))

['StartSection Name: A', 'EndSection Name: A']
['StartSection Name: B', 'EndSection Name: B']
[]


![Good](../examples/Valid.png) Supplying a *BufferedIterator* preserves the *Step Back* item.
- The Third call to `pprint(two_line_section.read(test_iter))` returns an empty list because a third *StartSection* is not found.

|Expected|Actual|
|-|-|
|`['StartSection Name: A', 'EndSection Name: A']`|`['StartSection Name: A', 'EndSection Name: A']`|
|`['StartSection Name: B', 'EndSection Name: B']`|`['StartSection Name: B', 'EndSection Name: B']`|
|`[]`|`[]`|

# Simple sections experimenting with start and end settings

#### 2-line Section Source

In [9]:
GENERIC_TEST_TEXT = [
    'Text to be ignored',
    'StartSection Name: A',
    'EndSection Name: A',
    'StartSection Name: B',
    'EndSection Name: B',
    'More text to be ignored',
    ]

## `end_on_first_item` Tests

The `end_on_first_item` parameter in a section definition determines whether the 
`end_section` break tests are applied to the first line in a section.
`end_on_first_item=True` does not *force* the section to complete after a single 
line.  It only makes it *possible* to stop after the first line.

The value of this parameter is that repeat sections do not require distinct
starting and ending sentinels. With `end_on_first_item=False` (the default) the 
same SectionBreak can be applied to both `start_section` and `end_section`.  
In this case, the second occurrence of the section begins immediately at the 
end of the previous section.

In [10]:
GENERIC_TEST_TEXT = [
    'Text to be ignored',
    'StartSection A',
    'EndSection A',
    'StartSection B',
    'EndSection B',
    'More text to be ignored',
    ]

#### Setting `end_on_first_item=False` (the default)
- Using identical `start_section` and `end_section`:
    > `start_section=SectionBreak('StartSection', break_offset='Before')`<br>
    > `end_section=SectionBreak('StartSection', break_offset='Before')`<br>

- Do not test first line of section (the default).
    > `end_on_first_item=False` 

In [11]:
start_sub_section = Section(
    section_name='StartSubSection',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('StartSection', break_offset='Before'),
    end_on_first_item=False
    )

pprint(start_sub_section.read(GENERIC_TEST_TEXT))

['StartSection A', 'EndSection A']


![Good](../examples/Valid.png) The first section is returned as a list.

<table>
    <thead><th>Expected</th><th>Actual</th></thead>
    <tr>
        <td><code>
          ['StartSection A', 'EndSection A']
        </code></td>
        <td><code>
          ['StartSection A', 'EndSection A']
        </code></td></tr>
</table>

#### Defining a top section with a repeating subsection
- Using identical `start_section` and `end_section`:
    > `start_section=SectionBreak('StartSection', break_offset='Before')`<br>
    > `end_section=SectionBreak('StartSection', break_offset='Before')`<br>

- Do not test first line of section (the default).
    > `end_on_first_item=False` 

In [12]:
start_sub_section = Section(
    section_name='StartSubSection',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    end_on_first_item=False
    )

repeating_section = Section(
    section_name='Top Section',
    end_section=SectionBreak('More text to be ignored', break_offset='Before'),
    processor=start_sub_section
    )
pprint(repeating_section.read(GENERIC_TEST_TEXT))

[['StartSection A', 'EndSection A'], ['StartSection B', 'EndSection B']]


![Good](../examples/Valid.png) Both subsections are returned as a list of lists.

<table>
    <thead><th>Expected</th><th>Actual</th></thead>
    <tr>
        <td><code>
          [<br>
          ['StartSection A', 'EndSection A'],<br>
           ['StartSection B', 'EndSection B']<br>
          ]
        </code></td>
        <td><code>
          [<br>
          ['StartSection A', 'EndSection A'],<br>
           ['StartSection B', 'EndSection B']<br>
          ]
        </code></td></tr>
</table>

- Using the same section definition, except allow testing of the first line of 
  the section.
    > `end_on_first_item=True` 

In [13]:
start_sub_section = Section(
    section_name='StartSubSection',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('StartSection', break_offset='Before'),
    end_on_first_item=True
    )

pprint(start_sub_section.read(GENERIC_TEST_TEXT))

[]


![Good](../examples/Valid.png) The ending SectionBreak triggers on the same 
item that triggers the start of the section.  This will always result in an 
empty section.

|Expected|Actual|
|-|-|
|[]|[]|

#### Single Line Section.
- Using the same sentinel *('EndSection')*, but different `break_offset`.
    > `start_section=SectionBreak('EndSection', break_offset='Before')`<br>
    > `end_section=SectionBreak('EndSection', break_offset='After')`<br>

- Allow testing of the first line of section.
    > `end_on_first_item=True` 

In [14]:
end_sub_section = Section(
    section_name='EndSubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    end_on_first_item=True
    )

pprint(end_sub_section.read(GENERIC_TEST_TEXT))

['EndSection A']


![Good](../examples/Valid.png) Single line section.
- Starts *Before* **EndSection** 
- Ends *After* **EndSection** (the same line)

|Expected|Actual|
|-|-|
|['EndSection A']|['EndSection A']|

#### Same section definition as above except with `end_on_first_item=False` 

In [15]:
end_sub_section = Section(
    section_name='EndSubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    end_on_first_item=False
    )

pprint(end_sub_section.read(GENERIC_TEST_TEXT))

['EndSection A', 'StartSection B', 'EndSection B']


![Good](../examples/Valid.png) Section continues until **After** next 
*EndSection* is found

<table>
    <thead><th>Expected</th><th>Actual</th></thead>
    <tr>
        <td><code>
          ['EndSection Name: A',<br> 
          'StartSection Name: B',<br>
          'EndSection Name: B']
        </code></td>
        <td><code>
          ['EndSection Name: A',<br>
          'StartSection Name: B',<br>
          'EndSection Name: B']
        </code></td></tr>
</table>

## Multiple Single line sections
### Single line section *StartSection*
> - Start *Before* `StartSection`
> - End *Before* `EndSection`

In [16]:
sub_section1 = Section(
    section_name='SubSection',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='Before')
    )

test_iter = BufferedIterator(GENERIC_TEST_TEXT)
pprint(sub_section1.read(test_iter))
pprint(sub_section1.read(test_iter))
pprint(sub_section1.read(test_iter))

['StartSection A']
['StartSection B']
[]


![Good](../examples/Valid.png) Single line `StartSections` are returned
|Expected|Actual|
|-|-|
|`['StartSection Name: A']`|`['StartSection Name: A']`|
|`['StartSection Name: B']`|`['StartSection Name: B']`|
|`[]`|`[]`|

### Defining a section that calls `sub_section1`

In [17]:
full_section = Section(
    section_name='Full',
    end_section=SectionBreak('ignored', break_offset='Before'),
    processor=[sub_section1]
    )
pprint(full_section.read(GENERIC_TEST_TEXT))

[['StartSection A'], ['StartSection B']]


![Good](../examples/Valid.png) Both *StartSection* lines are found.

<table>
    <thead><th>Expected</th><th>Actual</th></thead>
    <tr>
        <td><code>
        [<br>
          ['StartSection Name: A'],<br>
          ['StartSection Name: B'],<br>
        ]</code></td>
        <td><code>
        [<br>
          ['StartSection Name: A'],<br>
          ['StartSection Name: B'],<br>
        ]</code></td></tr>
</table>

### Multiple Attempts to define Single line *EndSection* 
#### Attempt #1. Copy *StartSection* design
> - Start *Before* `EndSection`
> - End *Before* `StartSection`


In [18]:
sub_section2a = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_section=SectionBreak('StartSection', break_offset='Before')
    )
test_iter = BufferedIterator(GENERIC_TEST_TEXT)
pprint(sub_section2a.read(test_iter))
pprint(sub_section2a.read(test_iter))
pprint(sub_section2a.read(test_iter))

['EndSection A']
['EndSection B', 'More text to be ignored']
[]


![Good](../examples/Valid.png) The result is expected, but not desired.
- This doesn't work well because there isn't another *StartSection* after the last it *EndSection*.
- As a result, it includes the unwanted text after the last *EndSection*.
 
|Expected|Actual|
|-|-|
|`['EndSection A']`|`['EndSection A']`|
|`['EndSection B', 'More text to be ignored']`|`['EndSection B', 'More text to be ignored']`|
|`[]`|`[]`|

#### Attempt #2
> - Start *Before* `EndSection`
> - End *After* `EndSection`

- This doesn't work because The first line is not tested so it only breaks after the second occurrence of *EndSection*.

In [19]:
sub_section2b = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    )
test_iter = BufferedIterator(GENERIC_TEST_TEXT)
pprint(sub_section2b.read(test_iter))
pprint(sub_section2b.read(test_iter))
pprint(sub_section2b.read(test_iter))

['EndSection A', 'StartSection B', 'EndSection B']
[]
[]


![Good](../examples/Valid.png) 

|Expected|Actual|
|-|-|
|`['EndSection A', 'StartSection B', 'EndSection B']`|`['EndSection A', 'StartSection B', 'EndSection B']`|
|`[]`|`[]`|
|`[]`|`[]`|


#### Attempt #3
> - Start *Before* `EndSection`
> - End *After* `EndSection`
> - Enable testing of first item

- This should work because the first line should be tested and trigger a break.

In [20]:
sub_section2c = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    end_on_first_item=True,
    )
test_iter = BufferedIterator(GENERIC_TEST_TEXT)
pprint(sub_section2c.read(test_iter))
pprint(sub_section2c.read(test_iter))
pprint(sub_section2c.read(test_iter))

['EndSection A']
['EndSection B']
[]


![Good](../examples/Valid.png) 

|Expected|Actual|
|-|-|
|`['EndSection A']`|`['EndSection A']`|
|`['EndSection B']`|`['EndSection B']`|
|`[]`|`[]`|


#### Attempt #3.5
> - Start *Before* `EndSection`
> - End *Before* `EndSection`
> - Enable testing of first item

- This should __<u>not</u>__ work because the first line should be tested and 
  trigger a break before returning anything.

In [21]:
sub_section2cc = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='Before'),
    end_on_first_item=True,
    )
test_iter = BufferedIterator(GENERIC_TEST_TEXT)
pprint(sub_section2cc.read(test_iter))
pprint(sub_section2cc.read(test_iter))
pprint(sub_section2cc.read(test_iter))

[]
[]
[]


![Good](../examples/Valid.png) 

|Expected|Actual|
|-|-|
|`[]`|`[]`|
|`[]`|`[]`|
|`[]`|`[]`|


#### Attempt #4
> - Start *Before* `EndSection`
> - End is `True` (Always Break)
> - Enable testing of first item

- This should work because the first line should be tested and always trigger a break.

In [22]:
sub_section2d = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_section=SectionBreak(True, break_offset='After'),
    end_on_first_item=True,
    )
test_iter = BufferedIterator(GENERIC_TEST_TEXT)
pprint(sub_section2d.read(test_iter))
pprint(sub_section2d.read(test_iter))
pprint(sub_section2d.read(test_iter))

['EndSection A']
['EndSection B']
[]


![Good](../examples/Valid.png) 

|Expected|Actual|
|-|-|
|`['StartSection A']`|`['EndSection A']`|
|`['StartSection B']`|`['EndSection B']`|
|`[]`|`[]`|

#### Attempt #5
> - Start *Before* `EndSection`
> - End is `True` (Always Break)
> - Don't enable testing of first item

- This should <u>not</u> work because the break should trigger on the second line.

In [23]:
sub_section2e = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_section=SectionBreak(True, break_offset='After')
    )
test_iter = BufferedIterator(GENERIC_TEST_TEXT)
pprint(sub_section2e.read(test_iter))
pprint(sub_section2e.read(test_iter))
pprint(sub_section2e.read(test_iter))

['EndSection A', 'StartSection B']
['EndSection B', 'More text to be ignored']
[]


![Good](../examples/Valid.png) 

|Expected|Actual|
|-|-|
|`['EndSection A', 'StartSection B']`|`['EndSection A', 'StartSection B']`|
|`['EndSection B', 'More text to be ignored']`|`['EndSection B', 'More text to be ignored']`|
|`[]`|`[]`|

**`end_section=SectionBreak(True)` defaults to `break_offset='Before'`**

#### Attempt #6
> - Start *Before* `EndSection`
> - End *After* ___`True`___ (Always Break)
> - Don't enable testing of first item

- This should <u>not</u> work because the break should trigger *After* the second line.

In [24]:
sub_section2f = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_section=SectionBreak(True, break_offset='After')
    )
test_iter = BufferedIterator(GENERIC_TEST_TEXT)
pprint(sub_section2f.read(test_iter))
pprint(sub_section2f.read(test_iter))
pprint(sub_section2f.read(test_iter))

['EndSection A', 'StartSection B']
['EndSection B', 'More text to be ignored']
[]


![Good](../examples/Valid.png)
|Expected|Actual|
|-|-|
|`['EndSection A', 'StartSection B']`|`['EndSection A', 'StartSection B']`|
|`['EndSection B', 'More text to be ignored']`|`['EndSection B', 'More text to be ignored']`|
|`[]`|`[]`|

#### Attempt #7
> - Start *Before* `EndSection`
> - Enable testing of first item
> - No End setting

- This should start with the first *EndSection* line and not stop until the end of the iterator.

In [25]:
sub_section2g = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_on_first_item=True,
    )
test_iter = BufferedIterator(GENERIC_TEST_TEXT)
pprint(sub_section2g.read(test_iter))
pprint(sub_section2g.read(test_iter))
pprint(sub_section2g.read(test_iter))

['EndSection A', 'StartSection B', 'EndSection B', 'More text to be ignored']
[]
[]


![Good](../examples/Valid.png)
|Expected|Actual|
|-|-|
|`['EndSection Name: A', ['StartSection Name: B', 'EndSection Name: B', 'More text to be ignored']`|`['EndSection Name: A', 'StartSection Name: B', 'EndSection Name: B', 'More text to be ignored']`|
|`[]`|`[]`|
|`[]`|`[]`|


#### Single line section *EndSection* __Final Design__.
> - Start *Before* `EndSection`
> - End *Before* ___`True`___ (Always Break)
> - Don't enable testing of first item

- This should work because the break should trigger *Before* the second line.

In [26]:
sub_section2 = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_section=SectionBreak(True, break_offset='Before')
    )
test_iter = BufferedIterator(GENERIC_TEST_TEXT)
pprint(sub_section2.read(test_iter))
pprint(sub_section2.read(test_iter))
pprint(sub_section2.read(test_iter))

['EndSection A']
['EndSection B']
[]


![Good](../examples/Valid.png) End breaks *Before* second line. 

|Expected|Actual|
|-|-|
|`['EndSection Name: A']`|`['EndSection Name: A']`|
|`['EndSection Name: B']`|`['EndSection Name: B']`|
|`[]`|`[]`|

In [27]:
GENERIC_TEST_TEXT = [
    'Text to be ignored',
    'StartSection A',
    'EndSection A',
    'StartSection B',
    'EndSection B', 
    'More text to be ignored',
    ]

In [28]:
start_sub_section = Section(
    section_name='StartSection',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='Before')
    )
end_sub_section = Section(
    section_name='EndSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_section=SectionBreak(True, break_offset='Before')
    )


### Combined Start and End subsections Single line section 
***sub_section1***
> - Start *Before* `StartSection`
> - End *Before* `EndSection`

***sub_section2*** 
> - Start *Before* `EndSection`
> - End *Before* ___`True`___ (Always Break)
> - Don't enable testing of first item

`processor=[[sub_section1, sub_section2]]`

In [29]:

full_section = Section(
    section_name='Full',
    processor=[[start_sub_section, end_sub_section]]
    )
pprint(full_section.read(GENERIC_TEST_TEXT))

[{'EndSection': ['EndSection A'], 'StartSection': ['StartSection A']},
 {'EndSection': ['EndSection B'], 'StartSection': ['StartSection B']}]


![Good](../examples/Valid.png) List of dictionaries. 
One dictionary for each section group.
<table>
    <thead><tr><th>Expected</th><th>Actual</th></tr></thead>
        <tr>
            <td><code>
                [<br>  
                {'StartSection': ['StartSection A'],<br>
                'EndSection':   ['EndSection A']},<br>
                {'StartSection': ['StartSection B'],<br>
                'EndSection':   ['EndSection B']}<br>
                ]
            </code></td>
            <td><code>
                [<br>  
                {'StartSection': ['StartSection A'],<br>
                'EndSection':   ['EndSection A']},<br>
                {'StartSection': ['StartSection B'],<br>
                'EndSection':   ['EndSection B']}<br>
                ]
            </code></td>
        </tr>
</table>

### Defining a Top Section that calls two single line subsections.

#### Two Subsection definitions 

In [30]:
start_sub_section = Section(
    section_name='StartSection',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak(True, break_offset='Before')
    )
end_sub_section = Section(
    section_name='EndSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_section=SectionBreak(True, break_offset='Before')
    )


#### Defining ***top_section*** 
- Contains an ending break:
    > `end_section=SectionBreak('ignored', break_offset='Before')`.

In [31]:

top_section = Section(
    section_name='Full',
    end_section=SectionBreak('ignored', break_offset='Before'),
    processor=[[start_sub_section, end_sub_section]]
    )
pprint(top_section.read(GENERIC_TEST_TEXT))

[{'EndSection': ['EndSection A'], 'StartSection': ['StartSection A']},
 {'EndSection': ['EndSection B'], 'StartSection': ['StartSection B']}]


![Good](../examples/Valid.png) List of dictionaries. 
One dictionary for each section group.
<table>
    <thead><tr><th>Expected</th><th>Actual</th></tr></thead>
        <tr>
            <td><code>
                [<br>  
                {'StartSection': ['StartSection A'],<br>
                'EndSection':   ['EndSection A']},<br>
                {'StartSection': ['StartSection B'],<br>
                'EndSection':   ['EndSection B']}<br>
                ]
            </code></td>
            <td><code>
                [<br>  
                {'StartSection': ['StartSection A'],<br>
                'EndSection':   ['EndSection A']},<br>
                {'StartSection': ['StartSection B'],<br>
                'EndSection':   ['EndSection B']}<br>
                ]
            </code></td>
        </tr>
</table>

#### Adding subsection C and subsection D

In [32]:
GENERIC_TEST_TEXT = [
    'Text to be ignored',
    'StartSection A',
    'EndSection A',
    'StartSection B',
    'EndSection B', 
    'StartSection C',
    'EndSection C',
    'StartSection D',
    'EndSection D',
    'More text to be ignored',
    ]

pprint(top_section.read(GENERIC_TEST_TEXT))

[{'EndSection': ['EndSection A'], 'StartSection': ['StartSection A']},
 {'EndSection': ['EndSection B'], 'StartSection': ['StartSection B']},
 {'EndSection': ['EndSection C'], 'StartSection': ['StartSection C']},
 {'EndSection': ['EndSection D'], 'StartSection': ['StartSection D']}]


![Good](../examples/Valid.png) List of dictionaries. 
One dictionary for each section group.
<table>
    <thead><tr><th>Expected</th><th>Actual</th></tr></thead>
        <tr><td><code>[</code></td><td><code>[</code></td></tr>
        <tr>
            <td><code>
                {'StartSection': ['StartSection A'],<br>
                'EndSection':   ['EndSection A']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection A'],<br>
                'EndSection':   ['EndSection A']},
            </code></td>
        </tr><tr>
            <td><code>
                {'StartSection': ['StartSection B'],<br>
                'EndSection':   ['EndSection B']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection B'],<br>
                'EndSection':   ['EndSection B']},
            </code></td>
        </tr><tr>
            <td><code>
                {'StartSection': ['StartSection C'],<br>
                'EndSection':   ['EndSection C']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection C'],<br>
                'EndSection':   ['EndSection C']},
            </code></td>
        </tr><tr>
            <td><code>
                {'StartSection': ['StartSection D'],<br>
                'EndSection':   ['EndSection D']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection D'],<br>
                'EndSection':   ['EndSection D']},
            </code></td>
        </tr>
        <tr><td><code>]</code></td><td><code>]</code></td></tr>
</table>

### Including unwanted text in between subsections

In [33]:
GENERIC_TEST_TEXT = [
    'Text to be ignored',
    'StartSection A',
    'EndSection A',
    'StartSection B',  # Missing 'EndSection B',
    'StartSection C',
    'EndSection C',
    'More text to be ignored',   # 'ignored' triggers end of top section
    'StartSection D',
    'EndSection D',
    'More text to be ignored',
    ]

pprint(top_section.read(GENERIC_TEST_TEXT))


[{'EndSection': ['EndSection A'], 'StartSection': ['StartSection A']},
 {'EndSection': ['EndSection C'], 'StartSection': ['StartSection B']}]


![Good](../examples/Valid.png) List of dictionaries stops after
 _'More text to be ignored'_ line.
<table>
    <thead><tr><th>Expected</th><th>Actual</th></tr></thead>
        <tr><td><code>[</code></td><td><code>[</code></td></tr>
        <tr>
            <td><code>
                {'StartSection': ['StartSection A'],<br>
                'EndSection':   ['EndSection A']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection A'],<br>
                'EndSection':   ['EndSection A']},
            </code></td>
        </tr><tr>
            <td><code>
                {'StartSection': ['StartSection B'],<br>
                'EndSection':   ['EndSection B']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection B'],<br>
                'EndSection':   ['EndSection B']},
            </code></td>
        </tr><tr>
            <td><code>
                {'StartSection': ['StartSection C'],<br>
                'EndSection':   ['EndSection C']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection C'],<br>
                'EndSection':   ['EndSection C']},
            </code></td>
        </tr>
        <tr><td><code>]</code></td><td><code>]</code></td></tr>
</table>

#### Including unwanted text in between the start and end of subsection C

In [34]:
GENERIC_TEST_TEXT = [
    'Text to be ignored',
    'StartSection A',
    'EndSection A',
    'StartSection B',
    'EndSection B', 
    'StartSection C',
    'More text to be ignored',   # 'ignored' triggers end of top section
    'EndSection C',
    'StartSection D',
    'EndSection D',
    'More text to be ignored',
    ]

pprint(top_section.read(GENERIC_TEST_TEXT))

[{'EndSection': ['EndSection A'], 'StartSection': ['StartSection A']},
 {'EndSection': ['EndSection B'], 'StartSection': ['StartSection B']},
 {'StartSection': ['StartSection C']}]


![Good](../examples/Valid.png) List of dictionaries stops after
 _'More text to be ignored'_ line.
 For Section __C__, only _'StartSection'_ is in the dictionary.
<table>
    <thead><tr><th>Expected</th><th>Actual</th></tr></thead>
        <tr><td><code>[</code></td><td><code>[</code></td></tr>
        <tr>
            <td><code>
                {'StartSection': ['StartSection A'],<br>
                'EndSection':   ['EndSection A']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection A'],<br>
                'EndSection':   ['EndSection A']},
            </code></td>
        </tr><tr>
            <td><code>
                {'StartSection': ['StartSection B'],<br>
                'EndSection':   ['EndSection B']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection B'],<br>
                'EndSection':   ['EndSection B']},
            </code></td>
        </tr><tr>
            <td><code>
                {'StartSection': ['StartSection C']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection C']},
            </code></td>
        </tr>
        <tr><td><code>]</code></td><td><code>]</code></td></tr>
</table>

#### Dropping the corresponding *EndSection* for a *StartSection*.

In [35]:
GENERIC_TEST_TEXT = [
    'Text to be ignored',
    'StartSection A',
    'EndSection A',
    'StartSection B',  # Missing 'EndSection B',
    'StartSection C',
    'EndSection C',
    'StartSection D',
    'EndSection D',
    'More text to be ignored',
    ]

pprint(top_section.read(GENERIC_TEST_TEXT))


[{'EndSection': ['EndSection A'], 'StartSection': ['StartSection A']},
 {'EndSection': ['EndSection C'], 'StartSection': ['StartSection B']},
 {'EndSection': ['EndSection D'], 'StartSection': ['StartSection D']}]


![Good](../examples/Valid.png) _'StartSection B'_ is read, then section ends. 
`top_section` then ignores all lines until start of next `end_sub_section`:
_'EndSection C'_.
<style type="text/css">
.hl{font-weight:bold;color:cyan;font-size: 110%;font-family: serif}
</style>
<table>
    <thead><tr><th>Expected</th><th>Actual</th></tr></thead>
        <tr><td><code>[</code></td><td><code>[</code></td></tr>
        <tr>
            <td><code>
                {'StartSection': ['StartSection A'],<br>
                'EndSection':   ['EndSection A']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection A'],<br>
                'EndSection':   ['EndSection A']},
            </code></td>
        </tr><tr>
            <td><code>
                {'StartSection': ['StartSection <Span class="hl">B</Span>'],<br>
                'EndSection':   ['EndSection <Span class="hl">C</Span>']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection <Span class="hl">B</Span>'],<br>
                'EndSection':   ['EndSection <Span class="hl">C</Span>']},
            </code></td>
        </tr><tr>
            <td><code>
                {'StartSection': ['StartSection D'],<br>
                'EndSection':   ['EndSection D']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection D'],<br>
                'EndSection':   ['EndSection D']},
            </code></td>
        </tr>
        <tr><td><code>]</code></td><td><code>]</code></td></tr>
</table>

#### Dropping the corresponding *StartSection* for an *EndSection*.

In [36]:
GENERIC_TEST_TEXT = [
    'Text to be ignored',
    'StartSection A',
    'EndSection A',
    'StartSection B',  
    'EndSection B',  # Missing 'StartSection C',
    'EndSection C',
    'StartSection D',
    'EndSection D',
    'More text to be ignored',
    ]

pprint(top_section.read(GENERIC_TEST_TEXT))


[{'EndSection': ['EndSection A'], 'StartSection': ['StartSection A']},
 {'EndSection': ['EndSection B'], 'StartSection': ['StartSection B']},
 {'EndSection': ['EndSection D'], 'StartSection': ['StartSection D']}]


![Good](../examples/Valid.png) The entire _Section C_ is skipped because it 
will not look for an _EndSection_ until a _StartSection_ is found.
<table>
    <thead><tr><th>Expected</th><th>Actual</th></tr></thead>
        <tr><td><code>[</code></td><td><code>[</code></td></tr>
        <tr>
            <td><code>
                {'StartSection': ['StartSection A'],<br>
                'EndSection':   ['EndSection A']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection A'],<br>
                'EndSection':   ['EndSection A']},
            </code></td>
        </tr><tr>
            <td><code>
                {'StartSection': ['StartSection B'],<br>
                'EndSection':   ['EndSection B']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection B'],<br>
                'EndSection':   ['EndSection B']},
            </code></td>
        </tr><tr>
            <td><code>
                {'StartSection': ['StartSection D'],<br>
                'EndSection':   ['EndSection D']},
            </code></td>
            <td><code>
                {'StartSection': ['StartSection D'],<br>
                'EndSection':   ['EndSection D']},
            </code></td>
        </tr>
        <tr><td><code>]</code></td><td><code>]</code></td></tr>
</table>

## Check *Source* status
- Check ___Source___ status after section read.

In [37]:
sub_section = Section(
    section_name='SubSection',
    start_section=SectionBreak('StartSection', break_offset='Before', name='SubSectionStart'),
    end_section=SectionBreak('EndSection', break_offset='After', name='SubSectionEnd')
    )
full_section = Section(
    section_name='Full',
    end_section=SectionBreak('ignored', break_offset='Before'),
    processor=sub_section
    )
a = full_section.read(GENERIC_TEST_TEXT)
print('previous_items')
pprint(list(full_section.source.previous_items))
print()
print('future_items')
pprint(list(full_section.source.future_items ))

previous_items
['EndSection B', 'EndSection C', 'StartSection D', 'EndSection D']

future_items
['More text to be ignored']


#### Expected source values
<table>
<thead>
  <tr>
    <th>Buffer</th>
    <th>Expected</th>
    <th>Actual</th>
    <th>Test</th>
  </tr>
</thead>
<tr><td><b>previous_items</b></td>
<td><code>['StartSection A', 'EndSection A',<br>
           'StartSection B', 'EndSection B']</code></td>
<td><code>['StartSection A', 'EndSection A',<br>
           'StartSection B', 'EndSection B']</code></td>
<td><img src="../examples/Valid.png" alt="Good"/></td></tr>
<tr><td><b>future_items</b></td>
<td><code>['More text to be ignored']</code></td>
<td><code>['More text to be ignored']</code></td>
<td><img src="../examples/Valid.png" alt="Good"/></td></tr>
</table>