# Subsections Issue

## Setup

### Imports

In [2]:
from typing import List
from pathlib import Path
from pprint import pprint
import re
import sys

import pandas as pd
import xlwings as xw

from buffered_iterator import BufferedIterator
import text_reader as tp
from sections import Rule, RuleSet, SectionBreak, ProcessingMethods, Section

### Function to compare context for two sections.

In [3]:
def compare_context(section1, section2):
    ctx_template = '{key:16s}:\t{item1:16s}\t{item2:16s}'
    context_1 = section1.context
    context_2 = section2.context
    keys_1 = set(context_1.keys())
    keys_2 = set(context_2.keys())
    all_keys = keys_1 | keys_2
    for key in all_keys:
        item1 = context_1.get(key, '')
        item2 = context_2.get(key, '')
        ctx_str = ctx_template.format(key=str(key), item1=str(item1), item2=str(item2))
        print(ctx_str)

In [4]:
GENERIC_TEST_TEXT = [
    'Text to be ignored',
    'StartSection Name: A',
    'EndSection Name: A',
    'Text between sections',
    'StartSection Name: B',
    'EndSection Name: B',
    'More text to be ignored'
    ]

In [5]:
GENERIC_TEST_TEXT1 = [
    'Text to be ignored',
    'StartSection A',
    'EndSection A',
    'StartSection B',
    'EndSection B',
    'StartSection C',
    'More text to be ignored',   # 'ignored' triggers end of top section
    'EndSection C',
    'Even more text to be ignored', 
    ]

In [6]:
GENERIC_TEST_TEXT1a = [
    'Text to be ignored',
    'StartSection A',
    'EndSection A',
    'StartSection B',  # Missing 'EndSection B',
    
    'StartSection C',
    'More text to be ignored',   # 'ignored' triggers end of top section
    'EndSection C',
    'Even more text to be ignored', 
    ]

In [7]:
GENERIC_TEST_TEXT2 = [
    'Text to be ignored',
    'StartSection A',
    'MiddleSection A',
    'EndSection A',
    'Unwanted text between sections',
    'StartSection B',
    'MiddleSection B',
    'EndSection B',
    'StartSection C',
    'MiddleSection C',
    'EndSection C',
    'Even more text to be ignored', 
    ]

# Simple sections experimenting with start and end settings

### Defining a Top Section that calls two single line subsections.

#### Two Subsection definitions 

In [8]:
start_sub_section = Section(
    section_name='StartSubSection',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak(True, break_offset='Before')
    )

end_sub_section = Section(
    section_name='EndSubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_section=SectionBreak(True, break_offset='Before')
    )

#### Including unwanted text in between the start and end of subsection C

#### Setting `keep_partial=True`

In [9]:
top_section = Section(
    section_name='Top Section',
    end_section=SectionBreak('ignored', break_offset='Before'),
    subsections=[start_sub_section, end_sub_section],
    keep_partial=True
    )

pprint(top_section.read(GENERIC_TEST_TEXT1))

[[['StartSection A'], ['EndSection A']],
 [['StartSection B'], ['EndSection B']],
 [['StartSection C'], []]]


![Bad](../examples/error.png) Expecting `['StartSection C']`, 
but not the following empty list.
- That may not be a bad thing.
- Just need to understand why it happens

<table>
    <thead><th>Expected</th><th>Actual</th></thead>
    <tr>
        <td><code>
          [<br>
            [['StartSection A'], ['EndSection A']],<br>
            [['StartSection B'], ['EndSection B']]<br>
            [['StartSection C']]<br>
          ]
        </code></td>
        <td><code>
          [<br>
            [['StartSection A'], ['EndSection A']],<br>
            [['StartSection B'], ['EndSection B']]<br>
            [['StartSection C'], []]<br>
          ]
        </code></td></tr>
</table>

#### Dropping the corresponding *EndSection* for a *StartSection*.

In [10]:
top_section = Section(
    section_name='Top Section',
    end_section=SectionBreak('ignored', break_offset='Before'),
    subsections=[start_sub_section, end_sub_section],
    keep_partial=True
    )

pprint(top_section.read(GENERIC_TEST_TEXT1a))

[[['StartSection A'], ['EndSection A']], [['StartSection B'], []]]


![Bad](../examples/error.png) Expecting `['StartSection B']`, 
but not the following empty list.
- 3rd section group never starts because **ignored** *Top Section* break line 
  occurs before next **EndSection**, so 2nd never finishes.

<table>
    <thead><th>Expected</th><th>Actual</th></thead>
    <tr>
        <td><code>
          [<br>
            [['StartSection A'], ['EndSection A']],<br>
            [['StartSection B']]<br>
          ]
        </code></td>
        <td><code>
          [<br>
            [['StartSection A'], ['EndSection A']],<br>
            [['StartSection B'], []]<br>
          ]
        </code></td></tr>
</table>

## Three line sections

**********

# DONE TO HERE

### Run with two-line sections:
> `[`<br>
> `'Text to be ignored',`<br>
> `'StartSection Name: A',`<br>
> `'EndSection Name: A',`<br>
> `'Text between sections',`<br>
> `'StartSection Name: B',`<br>
> `'EndSection Name: B',`<br>
> `'More text to be ignored'`<br>
> `]`<br>

- Section start **Before** *StartSection*
- Section end **After** *EndSection*
- SubSection start **After** *StartSection*

```python
sub_section = Section(section_name='SubSection',
    start_section=SectionBreak('StartSection', break_offset='After')
    )

full_section = Section(section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=[sub_section] 
    )
```

In [11]:
sub_section = Section(
    section_name='SubSection',
    start_section=SectionBreak('StartSection', break_offset='After')
    )
full_section = Section(
    section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=sub_section
    )
multi_section = Section(section_name='Multi',
    subsections=full_section
    )

In [12]:
pprint(multi_section.read(GENERIC_TEST_TEXT))

[]


- ![Bad](../examples/error.png) Results in blank sub-list.
<table><thead><th>Expected</th><th>Actual</th></thead>
  <td><code>
    [<br>
      [['EndSection Name: A']],<br>
      [['EndSection Name: B']]<br>
    ]</code></td>
  <td><code>[]</code></td></tr>
</table>

In [13]:
sub_section = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before')
    #end_on_first_item=True,
    #keep_partial=True,
    #end_section=SectionBreak(True)
    )

full_section = Section(
    section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=sub_section 
    )

multi_section = Section(section_name='Multi',
    subsections=full_section,
    #end_on_first_item=True
    )

pprint(multi_section.read(GENERIC_TEST_TEXT2))

[]


In [14]:
pprint(full_section.read(GENERIC_TEST_TEXT))

[]


- ![Bad](../examples/error.png) Results in blank sub-list.
<table><thead><th>Expected</th><th>Actual</th></thead>
<tr><td><code></code></td>
<td><code>[[]]</code></td></tr>
</table>

### Add start to SubSection Definition
> `start_section=SectionBreak('EndSection', break_offset='Before')`

In [15]:
sub_section = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),  # Added to use alone
    #end_on_first_item=True,
    #keep_partial=True,
    #end_section=SectionBreak(True)
    )

full_section = Section(
    section_name='Full',
    #start_section=SectionBreak('StartSection', break_offset='Before'),
    #end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=[sub_section] 
    )
pprint(full_section.read(GENERIC_TEST_TEXT))

[['EndSection Name: A',
  'Text between sections',
  'StartSection Name: B',
  'EndSection Name: B',
  'More text to be ignored']]


- ![Bad](../examples/Error.png) - ??? Is this incorrect or is this expected???
  
<table><thead><th>Expected</th><th>Actual</th></thead>
<tr><td><code></code></td>
<td><code>
[['EndSection Name: A',<br>
  'StartSection Name: B',
  'EndSection Name: B',<br>
  'More text to be ignored']]
  </code></td></tr>
</table>

### Add *Start After* to Section Definition
> `start_section=SectionBreak('StartSection', break_offset='After')`

In [16]:
sub_section = Section(
    section_name='SubSection',
    #start_section=SectionBreak('EndSection', break_offset='Before'),  # Added to use alone
    #end_on_first_item=True,
    #keep_partial=True,
    #end_section=SectionBreak(True)
    )

full_section = Section(
    section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='After'),
    #end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=[sub_section]  
    )
pprint(full_section.read(GENERIC_TEST_TEXT))

[['EndSection Name: A',
  'Text between sections',
  'StartSection Name: B',
  'EndSection Name: B',
  'More text to be ignored']]


- ![Good](../examples/Valid.png) Results in one line section
- ![Bad](../examples/Error.png) - ??? Is this incorrect or is this expected???
  
<table><thead><th>Expected</th><th>Actual</th></thead>
<tr><td><code></code></td>
<td><code>
[['EndSection Name: A',
  'StartSection Name: B',
  'EndSection Name: B',
  'More text to be ignored']]
  </code></td></tr>
</table>

### Add *Start After StartSection* to Section Definition and *Start Before End Section* to SubSection Definition
> **Section Definition**<br> 
> `start_section=SectionBreak('StartSection', break_offset='After')`
> 
> **SubSection Definition**<br>
> `start_section=SectionBreak('EndSection', break_offset='Before')`
    

In [17]:
sub_section = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),  # Added to use alone
    #end_on_first_item=True,
    #keep_partial=True,
    #end_section=SectionBreak(True)
    )

full_section = Section(
    section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='After'),
    #end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=[sub_section]  
    )
pprint(full_section.read(GENERIC_TEST_TEXT))

[['EndSection Name: A',
  'Text between sections',
  'StartSection Name: B',
  'EndSection Name: B',
  'More text to be ignored']]


- ![Bad](../examples/error.png) - ??? Is this incorrect or is this expected???
  
<table><thead><th>Expected</th><th>Actual</th></thead>
<tr><td><code></code></td>
<td><code>
[['EndSection Name: A',
  'StartSection Name: B',
  'EndSection Name: B',
  'More text to be ignored']]
  </code></td></tr>
</table>

### Add *Start After StartSection* to Section Definition and *Start Before End Section* to SubSection Definition and set *End On First Line* for SubSection
> **Section Definition**<br> 
> `start_section=SectionBreak('StartSection', break_offset='After')`
> 
> **SubSection Definition**<br>
> `start_section=SectionBreak('EndSection', break_offset='Before'),`
> `end_on_first_item=True,`
    

In [18]:
sub_section = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),  # Added to use alone
    end_on_first_item=True,
    #keep_partial=True,
    #end_section=SectionBreak(True)
    )

full_section = Section(
    section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='After'),
    #end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=[sub_section]  
    )
pprint(full_section.read(GENERIC_TEST_TEXT))

[['EndSection Name: A',
  'Text between sections',
  'StartSection Name: B',
  'EndSection Name: B',
  'More text to be ignored']]


- ![Bad](../examples/Error.png) - ??? Is this incorrect or is this expected???
  
<table><thead><th>Expected</th><th>Actual</th></thead>
<tr><td><code></code></td>
<td><code>
[['EndSection Name: A',
  'StartSection Name: B',
  'EndSection Name: B',
  'More text to be ignored']]
  </code></td></tr>
</table>

### Add *Start After StartSection* to Section Definition, and for SubSection Definition, set *Start* to  *Before EndSection* and *End* to *`True` (Always Break)*
> **Section Definition**<br> 
> `start_section=SectionBreak('StartSection', break_offset='After')`
> 
> **SubSection Definition**<br>
> `start_section=SectionBreak('EndSection', break_offset='Before'),`
> `end_section=SectionBreak(True),`
    

In [19]:
sub_section = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),  # Added to use alone
    #end_on_first_item=True,
    #keep_partial=True,
    end_section=SectionBreak(True)
    )

full_section = Section(
    section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='After'),
    #end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=[sub_section]  
    )
pprint(full_section.read(GENERIC_TEST_TEXT))

[['EndSection Name: A'], ['EndSection Name: B']]


- ![Good](../examples/Valid.png) Results in one line section
- ![Bad](../examples/Error.png) - ??? Is this incorrect or is this expected???
  
<table><thead><th>Expected</th><th>Actual</th></thead>
<tr><td><code></code></td>
<td><code>
[['EndSection Name: A'], ['EndSection Name: B'], []]
  </code></td></tr>
</table>

### Add *Start After StartSection* and *End Before EndSection* to Section Definition, and for SubSection Definition, set *Start* to  *Before EndSection* and *End* to *`True` (Always Break)*
> **Section Definition**<br> 
> `start_section=SectionBreak('StartSection', break_offset='After'),`
> `end_section=SectionBreak('EndSection', break_offset='After'),`
    
> 
> **SubSection Definition**<br>
> `start_section=SectionBreak('EndSection', break_offset='Before'),`
> `end_section=SectionBreak(True),`
    

In [20]:
sub_section = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),  # Added to use alone
    #end_on_first_item=True,
    #keep_partial=True,
    end_section=SectionBreak(True)
    )

full_section = Section(
    section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='After'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=[sub_section] 
    )
pprint(full_section.read(GENERIC_TEST_TEXT))

[['EndSection Name: A']]


- ![Good](../examples/Valid.png) Results in one line section
- ![Bad](../examples/Error.png) - ??? Is this incorrect or is this expected???
  
<table><thead><th>Expected</th><th>Actual</th></thead>
<tr><td><code></code></td>
<td><code>
[['EndSection Name: A']]
  </code></td></tr>
</table>

### Add *Start __Before__ StartSection* and *End Before EndSection* to Section Definition, and for SubSection Definition, set *Start* to  *Before EndSection* and *End* to *`True` (Always Break)*
> **Section Definition**<br> 
> `start_section=SectionBreak('StartSection', break_offset='Before'),`
> `end_section=SectionBreak('EndSection', break_offset='After'),`
    
> 
> **SubSection Definition**<br>
> `start_section=SectionBreak('EndSection', break_offset='Before'),`
> `end_section=SectionBreak(True),`
    

In [21]:
sub_section = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),  # Added to use alone
    #end_on_first_item=True,
    #keep_partial=True,
    end_section=SectionBreak(True)
    )

full_section = Section(
    section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=[sub_section]  
    )
pprint(full_section.read(GENERIC_TEST_TEXT))

[]


- ![Bad](../examples/Error.png) Results in empty list of lists
- ??? Is this incorrect or is this expected???
  
<table><thead><th>Expected</th><th>Actual</th></thead>
<tr><td><code></code></td>
<td><code>[[]]</code></td></tr>
</table>

### Add *Start __Before__ StartSection* and *End Before EndSection* to Section Definition, and for SubSection Definition, set *Start* to *Before EndSection*, *End* to *`True` (Always Break)* and *Keep Partial* to *`True`*
> **Section Definition**<br> 
> `start_section=SectionBreak('StartSection', break_offset='Before'),`
> `end_section=SectionBreak('EndSection', break_offset='After'),`    
> 
> **SubSection Definition**<br>
> `start_section=SectionBreak('EndSection', break_offset='Before'),`<br>
> `end_section=SectionBreak(True),`<br>
> `keep_partial=True,`
    

In [22]:
sub_section = Section(
    section_name='SubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),  # Added to use alone
    #end_on_first_item=True,
    keep_partial=True,
    end_section=SectionBreak(True)
    )

full_section = Section(
    section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=[sub_section]  
    )
pprint(full_section.read(GENERIC_TEST_TEXT))

[]


- ![Bad](../examples/Error.png) Results in empty list of lists
- ??? Is this incorrect or is this expected???
  
<table><thead><th>Expected</th><th>Actual</th></thead>
<tr><td><code></code></td>
<td><code>[[]]</code></td></tr>
</table>

### Add *Start __Before__ StartSection* and *End Before EndSection* to Section Definition, and don't set any SectionBreaks for SubSection Definition, 
> **Section Definition**<br> 
> `start_section=SectionBreak('StartSection', break_offset='Before'),`
> `end_section=SectionBreak('EndSection', break_offset='After'),`    
    

In [23]:
sub_section = Section(
    section_name='SubSection',
    #start_section=SectionBreak('EndSection', break_offset='Before'),  # Added to use alone
    #end_on_first_item=True,
    #keep_partial=True,
    #end_section=SectionBreak(True)
    )

full_section = Section(
    section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=[sub_section]  
    )
pprint(full_section.read(GENERIC_TEST_TEXT))

[['StartSection Name: A', 'EndSection Name: A']]


- ![Good](../examples/Valid.png) ![Bad](../examples/Error.png) ??? Is this incorrect or is this expected???
  
<table><thead><th>Expected</th><th>Actual</th></thead>
<tr><td><code>[['StartSection Name: A', 'EndSection Name: A']]</code></td>
<td><code>[['StartSection Name: A', 'EndSection Name: A']]</code></td></tr>
</table>

### Add *Start __After__ StartSection* and *End Before EndSection* to Section Definition, and don't set any SectionBreaks for SubSection Definition, 
> **Section Definition**<br> 
> `start_section=SectionBreak('StartSection', break_offset='After'),`
> `end_section=SectionBreak('EndSection', break_offset='After'),`    
    

In [24]:
sub_section = Section(
    section_name='SubSection',
    #start_section=SectionBreak('EndSection', break_offset='Before'),  # Added to use alone
    #end_on_first_item=True,
    #keep_partial=True,
    #end_section=SectionBreak(True)
    )

full_section = Section(
    section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='After'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=[sub_section]  
    )
pprint(full_section.read(GENERIC_TEST_TEXT))

[['EndSection Name: A',
  'Text between sections',
  'StartSection Name: B',
  'EndSection Name: B']]


- ![Good](../examples/Valid.png) ![Bad](../examples/Error.png) ??? Is this incorrect or is this expected???
- 
<table><thead><th>Expected</th><th>Actual</th></thead>
<tr><td><code>[['EndSection Name: A']]</code></td>
<td><code>[['EndSection Name: A']]</code></td></tr>
</table>

## Three line sections

In [25]:
GENERIC_TEST_TEXT3 = [
    'Text to be ignored',
    'StartSection A',
    'MiddleSection A',
    'EndSection A',
    'Unwanted text between sections',
    'StartSection B',
    'Random text in the middle of a section',
    'MiddleSection B',
    'EndSection B',
    'StartSection C',
    'MiddleSection C',
    'EndSection C',
    'Even more text to be ignored', 
    ]

#### Single line subsection definitions 

In [26]:
start_sub_section = Section(
    section_name='StartSubSection',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak(True, break_offset='Before')
    )

middle_sub_section = Section(
    section_name='MiddleSubSection',
    start_section=SectionBreak('MiddleSection', break_offset='Before'),
    end_section=SectionBreak(True, break_offset='Before')
    )

end_sub_section = Section(
    section_name='EndSubSection',
    start_section=SectionBreak('EndSection', break_offset='Before'),
    end_section=SectionBreak(True, break_offset='Before')
    )

#### Defining ***three_part_section*** 
- Contains an ending break:
    > `end_section=SectionBreak('ignored', break_offset='Before')`.

- Contains 3 subsections:
    > `[start_sub_section, middle_sub_section, end_sub_section]`

In [27]:
three_part_section = Section(
    section_name='Top Section',
    end_section=SectionBreak('ignored', break_offset='Before'),
    subsections=[start_sub_section, middle_sub_section, end_sub_section]
    )
pprint(three_part_section.read(GENERIC_TEST_TEXT3))

[[['StartSection A'], ['MiddleSection A'], ['EndSection A']],
 [['StartSection B'], ['MiddleSection B'], ['EndSection B']],
 [['StartSection C'], ['MiddleSection C'], ['EndSection C']]]


![Good](../examples/Valid.png) All 3 sections and subsections are completed. 

<table>
    <thead><th>Expected</th><th>Actual</th></thead>
    <tr>
        <td><code>
          [<br>
            [
              ['StartSection Name: A'],<br> 
              ['MiddleSection A'],<br> 
              ['EndSection Name: A']
            ],<br>
            [
              ['StartSection Name: B'],<br> 
              ['MiddleSection A'],<br> 
              ['EndSection Name: B']
            ]<br>
            [
              ['StartSection Name: C'],<br> 
              ['MiddleSection C'],<br> 
              ['EndSection Name: C']
            ]<br>
          ]
        </code></td>
        <td><code>
          [<br>
            [
              ['StartSection Name: A'],<br> 
              ['MiddleSection A'],<br> 
              ['EndSection Name: A']
            ],<br>
            [
              ['StartSection Name: B'],<br> 
              ['MiddleSection A'],<br> 
              ['EndSection Name: B']
            ]<br>
            [
              ['StartSection Name: C'],<br> 
              ['MiddleSection C'],<br> 
              ['EndSection Name: C']
            ]<br>
          ]
        </code></td></tr>
</table>

- Section start **Before** *StartSection*
- Section end **After** *EndSection*
- SubSection **End On First**

```python
sub_section = Section(section_name='SubSection',
    end_on_first_item=True,
    )

full_section = Section(section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=[sub_section] 
    )
```

In [28]:
sub_section = Section(
    section_name='SubSection',
    #start_section=SectionBreak('StartSection', break_offset='After')
    end_on_first_item=True,
    #keep_partial=True,
    #end_section=SectionBreak(True)
    )

full_section = Section(
    section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    subsections=[sub_section] 
    )

pprint(full_section.read(GENERIC_TEST_TEXT3))

[['StartSection A', 'MiddleSection A', 'EndSection A']]


In [29]:
top_section = Section(
    section_name='Top Section',
    end_section=SectionBreak('ignored', break_offset='Before'),
    subsections=[start_sub_section, end_sub_section]
    )
pprint(top_section.read(GENERIC_TEST_TEXT3))

[[['StartSection A'], ['EndSection A']],
 [['StartSection B'], ['EndSection B']],
 [['StartSection C'], ['EndSection C']]]


In [30]:
sub_section = Section(
    section_name='SubSection',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('StartSection', break_offset='After')
    )

In [31]:
full_section = Section(
    section_name='Full',
    subsections=sub_section,
    keep_partial=True
    )

In [32]:
test_iter = BufferedIterator(GENERIC_TEST_TEXT3)
read_1 = full_section.read(test_iter)

In [33]:
pprint(list(test_iter.previous_items))

['EndSection B',
 'StartSection C',
 'MiddleSection C',
 'EndSection C',
 'Even more text to be ignored']


In [34]:
pprint(list(test_iter.future_items))

[]


In [35]:
compare_context(full_section, sub_section)

Break           :	                	SectionBreak    
Current Section :	Full            	SubSection      
Status          :	End of Source   	End of Source   
Event           :	                	StartSection    
Skipped Lines   :	[]              	['Random text in the middle of a section', 'MiddleSection B', 'EndSection B']


In [63]:
buffer_size = 5
num_items = 10

str_source = BufferedIterator((str(i) for i in range(num_items)), 
                              buffer_size=buffer_size)

def pairs(source):
    for item in source:
        yield tuple([item, next(source)])
        
subsection = Section(
    section_name='subsection',
    start_section=SectionBreak('1', break_offset='After')
    )

section_2_1 = Section(
    section_name='2-to-1 match',
    start_section=SectionBreak('1', break_offset='Before'),
    end_section=SectionBreak('6', break_offset='After'),
    processor=[pairs],
    subsections=subsection
    )

section_2_1.read(str_source)

[[('4', '5')]]