# SubSection Management

In [6]:
from pprint import pprint
from single_section import SingleSection
from single_section import SectionBreak
from buffered_iterator import BufferedIterator
from single_section import set_method
from single_section import StandardProcessingMethods

In [7]:
test_text = [
            'Text to be ignored',
            'StartSection A',
            'MiddleSection A',
            'EndSection A',
            'Unwanted text between sections',
            'StartSection B',
            'MiddleSection B',
            'EndSection B',
            'StartSection C',
            'MiddleSection C',
            'EndSection C',
            'Even more text to be ignored',
            ]

all_sections = SingleSection(section_name='AllSections')
all_sections.read(test_text)

single_section = SingleSection(
    section_name='SubSection',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After')
    )
single_section.read(test_text)

['StartSection A', 'MiddleSection A', 'EndSection A']

In [8]:
test_text = [
            'Text to be ignored',
            'StartSection A',
            'MiddleSection A',
            'EndSection A',
            'Unwanted text between sections',
            'StartSection B',
            'MiddleSection B',
            'EndSection B',
            'StartSection C',
            'MiddleSection C',
            'EndSection C',
            'Even more text to be ignored',
            ]


test_iter = BufferedIterator(test_text)

single_section = SingleSection(
    section_name='SubSection',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After')
    )

while True:
    if single_section.scan_status in ['Scan Complete', 'End of Source']:
        break
    print(single_section.read(test_iter))
    pprint(single_section.context)


['StartSection A', 'MiddleSection A', 'EndSection A']
{'Break': 'SectionBreak',
 'Current Section': 'SubSection',
 'Event': 'EndSection',
 'Skipped Lines': ['Text to be ignored'],
 'Status': 'Break Triggered'}
['StartSection B', 'MiddleSection B', 'EndSection B']
{'Break': 'SectionBreak',
 'Current Section': 'SubSection',
 'Event': 'EndSection',
 'Skipped Lines': ['Unwanted text between sections'],
 'Status': 'Break Triggered'}
['StartSection C', 'MiddleSection C', 'EndSection C']
{'Break': 'SectionBreak',
 'Current Section': 'SubSection',
 'Event': 'EndSection',
 'Skipped Lines': [],
 'Status': 'Break Triggered'}
[]
{'Current Section': 'SubSection',
 'Skipped Lines': ['Even more text to be ignored'],
 'Status': 'End of Source'}


# Multiple SubSections

## Single line subsections

In [9]:
multi_section_text = [
    'StartSection Name:A',
    'A Content1:a',
    'EndSection Name:A',
    'StartSection Name:B',
    'A Content2:a',
    'EndSection Name:B'
    ]


name_section = SingleSection(
    section_name='Name',
    end_section=SectionBreak(True, name='NameEnd')
    )
content_section = SingleSection(
    section_name='Content',
    end_section=SectionBreak('EndSection', break_offset='Before', name='EndContent')
    )
end_section = SingleSection(
    section_name='End',
    end_section=SectionBreak(True, name='EndEnd')
    )

test_iter = BufferedIterator(multi_section_text)

print(name_section.read(test_iter))
print(content_section.read(test_iter))
print(end_section.read(test_iter))

['StartSection Name:A']
['A Content1:a']
['EndSection Name:A']


## Context updating
**External context is not being modified**

In [22]:
name_section = SingleSection(
    section_name='Name',
    end_section=SectionBreak(True, name='NameEnd')
    )
content_section = SingleSection(
    section_name='Content',
    end_section=SectionBreak('EndSection', break_offset='Before', name='EndContent')
    )
end_section = SingleSection(
    section_name='End',
    end_section=SectionBreak(True, name='EndEnd')
    )


multi_section_text = [
    'StartSection Name:A',
    'A Content1:a',
    'EndSection Name:A',
    'StartSection Name:B',
    'A Content2:a',
    'EndSection Name:B'
    ]

test_iter = BufferedIterator(multi_section_text)

section_list = [name_section, content_section, end_section]

test_context = {'dummy': 'Test'}

while True:
    section_group = {}
    for sub_section in section_list:
        sub_section_item = sub_section.read(test_iter, context = test_context)
        section_group[sub_section.section_name] = sub_section_item
    pprint(section_group)
    if sub_section.scan_status in ['Scan Complete', 'End of Source']:
        break


print()
pprint(test_context)

{'Content': ['A Content1:a'],
 'End': ['EndSection Name:A'],
 'Name': ['StartSection Name:A']}
{'Content': ['A Content2:a'],
 'End': ['EndSection Name:B'],
 'Name': ['StartSection Name:B']}

{'dummy': 'Test'}


Context __is__ being modified

In [11]:
def set_context(item, context):
    context['This Item'] = item
    return item


name_section = SingleSection(
    section_name='Name',
    end_section=SectionBreak(True, name='NameEnd')
    )
content_section = SingleSection(
    section_name='Content',
    end_section=SectionBreak('EndSection', break_offset='Before', name='EndContent')
    )
end_section = SingleSection(
    section_name='End',
    end_section=SectionBreak(True, name='EndEnd'),
    processor=set_context
    )


multi_section_text = [
    'StartSection Name:A',
    'A Content1:a',
    'EndSection Name:A',
    'StartSection Name:B',
    'A Content2:a',
    'EndSection Name:B'
    ]

test_iter = BufferedIterator(multi_section_text)

section_list = [name_section, content_section, end_section]

test_context = {'dummy': 'Test'}


while True:
    section_group = {}
    for sub_section in section_list:
        sub_section_item = sub_section.read(test_iter, context = test_context)
        section_group[sub_section.section_name] = sub_section_item
        #pprint(sub_section.context)
        #print()
    pprint(section_group)
    if sub_section.scan_status in ['Scan Complete', 'End of Source']:
        break

    
print()
pprint(sub_section.context)

print()
pprint(test_context)

{'Content': ['A Content1:a'],
 'End': ['EndSection Name:A'],
 'Name': ['StartSection Name:A']}
{'Content': ['A Content2:a'],
 'End': ['EndSection Name:B'],
 'Name': ['StartSection Name:B']}

{'Current Section': 'End',
 'Skipped Lines': [],
 'Status': 'End of Source',
 'This Item': 'EndSection Name:B',
 'dummy': 'Test'}

{'This Item': 'EndSection Name:B', 'dummy': 'Test'}


**This Works**
test_context is being updated

In [12]:
def set_context(item, context):
    context['This Item'] = item
    return item


test_context = {'dummy': 'Test'}

set_context('A', test_context)

test_context

{'dummy': 'Test', 'This Item': 'A'}

**This Works**
test_context is being updated

In [13]:
def set_context(item, context):
    context['This Item'] = item
    return item


use_function = set_method(set_context, method_type='Process')

test_context = {'dummy': 'Test'}

use_function('A', test_context)

test_context

{'dummy': 'Test', 'This Item': 'A'}

**This Works**
test_context is being updated

In [14]:
def set_context(item, context):
    context['This Item'] = item
    return item

multi_section_text = [
    'StartSection Name:A',
    'A Content1:a',
    'EndSection Name:A',
    'StartSection Name:B',
    'A Content2:a',
    'EndSection Name:B'
    ]

test_iter = BufferedIterator(multi_section_text)
test_context = {'dummy': 'Test'}

processor = StandardProcessingMethods([set_context])

pprint(processor.read(test_iter, test_context))

test_context

['StartSection Name:A',
 'A Content1:a',
 'EndSection Name:A',
 'StartSection Name:B',
 'A Content2:a',
 'EndSection Name:B']


{'dummy': 'Test', 'This Item': 'EndSection Name:B'}

**This Works**
test_context is being updated

In [15]:
def set_context(item, context):
    context['This Item'] = item
    return item

multi_section_text = [
    'StartSection Name:A',
    'A Content1:a',
    'EndSection Name:A',
    'StartSection Name:B',
    'A Content2:a',
    'EndSection Name:B'
    ]

test_iter = BufferedIterator(multi_section_text)
test_context = {'dummy': 'Test'}

content_section = SingleSection(
    section_name='Content',
    end_section=SectionBreak('EndSection', break_offset='Before', name='EndContent'),
    processor=set_context
    )

section_proc = content_section.processor.reader(test_iter, context = test_context)
pprint([i for i in section_proc])

test_context



['StartSection Name:A',
 'A Content1:a',
 'EndSection Name:A',
 'StartSection Name:B',
 'A Content2:a',
 'EndSection Name:B']


{'dummy': 'Test', 'This Item': 'EndSection Name:B'}

**This Works**
test_context is being updated

In [16]:
def set_context(item, context):
    context['This Item'] = item
    return item

multi_section_text = [
    'StartSection Name:A',
    'A Content1:a',
    'EndSection Name:A',
    'StartSection Name:B',
    'A Content2:a',
    'EndSection Name:B'
    ]

test_iter = BufferedIterator(multi_section_text)
test_context = {'dummy': 'Test'}


content_section = SingleSection(
    section_name='Content',
    end_section=SectionBreak('EndSection', break_offset='Before', name='EndContent'),
    processor=set_context
    )


section_iter = content_section.scan(test_iter, context = test_context)
read_iter = content_section.processor.reader(section_iter, context = test_context)

pprint([i for i in read_iter])

test_context

['StartSection Name:A', 'A Content1:a']


{'dummy': 'Test', 'This Item': 'A Content1:a'}

**This Works**
test_context is being propagated even when the second processing function 
doesn't take context.

In [17]:
def set_context(item, context):
    context['This Item'] = item
    return item

def do_nothing(item):
    return item

multi_section_text = [
    'StartSection Name:A',
    'A Content1:a',
    'EndSection Name:A',
    'StartSection Name:B',
    'A Content2:a',
    'EndSection Name:B'
    ]

test_iter = BufferedIterator(multi_section_text)
test_context = {'dummy': 'Test'}

processor = StandardProcessingMethods([set_context, do_nothing])

pprint(processor.read(test_iter, test_context))

test_context

['StartSection Name:A',
 'A Content1:a',
 'EndSection Name:A',
 'StartSection Name:B',
 'A Content2:a',
 'EndSection Name:B']


{'dummy': 'Test', 'This Item': 'EndSection Name:B'}

**This Works**
test_context is being updated when the list() function is called

In [18]:
def set_context(item, context):
    context['This Item'] = item
    return item

multi_section_text = [
    'StartSection Name:A',
    'A Content1:a',
    'EndSection Name:A',
    'StartSection Name:B',
    'A Content2:a',
    'EndSection Name:B'
    ]

test_iter = BufferedIterator(multi_section_text)
test_context = {'dummy': 'Test'}


content_section = SingleSection(
    section_name='Content',
    end_section=SectionBreak('EndSection', break_offset='Before', name='EndContent'),
    processor=set_context
    )


section_iter = content_section.scan(test_iter, context = test_context)
read_iter = content_section.processor.reader(section_iter, context = test_context)

#pprint([i for i in read_iter])
list(read_iter)

test_context

{'dummy': 'Test', 'This Item': 'A Content1:a'}

**This Works**
test_context is being updated when the assemble() function is called

In [19]:
def set_context(item, context):
    context['This Item'] = item
    return item

multi_section_text = [
    'StartSection Name:A',
    'A Content1:a',
    'EndSection Name:A',
    'StartSection Name:B',
    'A Content2:a',
    'EndSection Name:B'
    ]

test_iter = BufferedIterator(multi_section_text)
test_context = {'dummy': 'Test'}


content_section = SingleSection(
    section_name='Content',
    end_section=SectionBreak('EndSection', break_offset='Before', name='EndContent'),
    processor=set_context
    )


section_iter = content_section.scan(test_iter, context = test_context)
read_iter = content_section.processor.reader(section_iter, context = test_context)

section_assembled = content_section.assemble(read_iter, context=test_context)

pprint(section_assembled)
print('\n')
pprint(test_context)
print('\n')
pprint(content_section.context)


['StartSection Name:A', 'A Content1:a']


{'This Item': 'A Content1:a', 'dummy': 'Test'}


{'Break': 'EndContent',
 'Current Section': 'Content',
 'Event': 'EndSection',
 'Skipped Lines': [],
 'Status': 'Break Triggered',
 'This Item': 'A Content1:a',
 'dummy': 'Test'}


**This Works**
test_context is being updated

In [23]:
def set_context(item, context):
    context['This Item'] = item
    return item

multi_section_text = [
    'StartSection Name:A',
    'A Content1:a',
    'EndSection Name:A',
    'StartSection Name:B',
    'A Content2:a',
    'EndSection Name:B'
    ]

test_iter = BufferedIterator(multi_section_text)
test_context = {'dummy': 'Test'}


content_section = SingleSection(
    section_name='Content',
    end_section=SectionBreak('EndSection', break_offset='Before', name='EndContent'),
    processor=set_context
    )


section_iter = content_section.scan(test_iter, context = test_context)
read_iter = content_section.process(section_iter, context = test_context)

pprint([i for i in read_iter])
print('\n')
pprint(test_context)
print('\n')
pprint(content_section.context)

['StartSection Name:A', 'A Content1:a']


{'This Item': 'A Content1:a', 'dummy': 'Test'}


{'Break': 'EndContent',
 'Current Section': 'Content',
 'Event': 'EndSection',
 'Skipped Lines': [],
 'Status': 'End of Source',
 'This Item': 'A Content1:a',
 'dummy': 'Test'}


# Done To Here

In [24]:
def set_context(item, context):
    context['This Item'] = item
    return item


def print_context(item, context):
    pprint(context)
    return item


name_section = SingleSection(
    section_name='Name',
    end_section=SectionBreak(True, name='NameEnd'),
    processor=set_context
    )
content_section = SingleSection(
    section_name='Content',
    end_section=SectionBreak('EndSection', break_offset='Before', name='EndContent'),
    processor=set_context
    )
end_section = SingleSection(
    section_name='End',
    end_section=SectionBreak(True, name='EndEnd'),
    processor=print_context
    )


In [None]:
test_iter = BufferedIterator(multi_section_text)

section_list = [name_section, content_section, end_section]

test_context = {'dummy': 'Test'}



while True:
    section_group = {}
    for sub_section in section_list:
        sub_section_item = sub_section.read(test_iter, context = test_context)
        section_group[sub_section.section_name] = sub_section_item
        pprint(sub_section.context)
        print()
    pprint(section_group)
    if sub_section.scan_status in ['Scan Complete', 'End of Source']:
        break
pprint(sub_section.context)


In [None]:
full_section = SingleSection(
    section_name='Full',
    start_section=SectionBreak('StartSection', break_offset='Before'),
    end_section=SectionBreak('EndSection', break_offset='After'),
    processor=[name_section, content_section, end_section]
    )

In [None]:
# This isolates the subsection context from the section context to
# protect the section context items that shouldn't be changed by the
# subsection.
s_context = context.copy()

# section_iter is wrapped in a BufferedIterator here so that the
# subsection will return the appropriate indexing.
buf_source = BufferedIterator(source)
buf_source.link(self.source)

## set_subsection_reader
``

Converts section instances to functions that call the section reader.

Args:<br>
> **processing_def (ProcessMethodDef):** A processing method.

Raises:<br>
>  **ValueError:** For processing instruction items which are lists where
        the list items are not all Section instances.

Returns:<br>
>  **ProcessMethodDef:** If processing_def is a section object, or a list
    of section objects, return a partial function that calls the
    section(s) read_subsections method.
    Otherwise returns processing_def.

In [None]:
def true_iterable(variable)-> bool:
    '''Indicate if the variable is a non-string type iterable.
    Arguments:
        variable {Iterable[Any]} -- The variable to test.
    Returns:
        True if variable is a non-string iterable.
    '''
    return not isinstance(variable, str) and isinstance(variable, Iterable)  # pylint: disable=isinstance-second-argument-not-valid-type


In [None]:
def is_sections(func_list):
    # Tests whether methods in a list are Section objects.
    is_sec = [isinstance(sub_rdr, self.__class__)
                for sub_rdr in func_list]
    return is_sec


In [None]:
def is_all_sections(func):
    sec_check = all(
        isinstance(sub_rdr, self.__class__)
        for sub_rdr in func
        )
    return sec_check


In [None]:
def section_naming(func):
    # Give subsections unique names so that the dictionary of section
    # reads won't loose anything.
    section_names = [sub_rdr.section_name for sub_rdr in func]
    unique_names = set(section_names)
    if len(unique_names) < len(section_names):
        renamed = list()
        for idx, sub_rdr in enumerate(func):
            name = sub_rdr.section_name
            new_name = name + str(idx)
            sub_rdr.section_name = new_name
            renamed.append(sub_rdr)
    else:
        renamed = func
    return renamed


In [None]:
def set_subsection_reader(self, processing_def):
    # Look for individual subsections
    if isinstance(processing_def, (self.__class__)):
        read_func = partial(Section.read_subsections, self,
                            subsections=[processing_def])
        return read_func
    # Look for subsection groups
    if true_iterable(processing_def):
        # Lists inside the list of processing methods should be a list of
        # sections.  Check if all item is the list are type Section.
        sec_check = is_sections(processing_def)
        if all(sec_check):
            cln_func = section_naming(processing_def)
            read_func = partial(Section.read_subsections, self,
                                subsections=cln_func)
            return read_func
        elif any(sec_check):
            msg = ' '.join(['If an individual processing function is a '
                            'list all items in the list must be of type '
                            'Section.'])
            raise ValueError(msg)
        else:
            return processing_def
    return processing_def


In [None]:
def read_subsections(self, source: SectionGen, context: ContextType,
                         subsections: List[Section])->ProcessOutput:
        '''Read a single or group of subsections.

        This method is used for section instances supplied as processor items to
        this section definition. It calls the Section.read method on each
        subsection.  It isolates this section's source and context from the
        subsection so that the section iterator's next() is only called when
        necessary and so the subsection's status does not mix with this
        section's status.

        This section's context attribute is updated after all subsections have
        been read and if necessary, this section's source pointer is adjusted
        so that any "Future Items" are not missed.

        Arguments:
            source (SectionGen): This section's processor iterator.
            context (ContextType): This section's context.
            subsections (List[Section]): The subsections to be read.

        Yields:
            ProcessOutput:
                If subsections is a Section instance:
                    The assemble result from calling subsections.read()
                If subsections is a list of Section instances:
                    A dictionary where the keys are the subsection names and the
                    values are the results from calling subsection.read().
        '''
        def read_section(subsections, buf_source, s_context):
            done_read = False
            subsection = subsections[0]
            read_itm = subsection.read(buf_source, context=s_context,
                                       start_search=True)
            s_context.update(subsection.context)
            if subsection.scan_status in ['End of Source']:
                done_read = True  # Break if end of source reached
            return read_itm, s_context, done_read

        def read_group(subsections, buf_source, s_context):
            read_items = dict()
            done_read = False
            for sub_sec in subsections:
                read_itm = sub_sec.read(buf_source, context=s_context,
                                                start_search=True)
                s_context.update(sub_sec.context)
                if sub_sec.scan_status in ['End of Source']:
                    done_read = True  # Break if end of source reached
                    if not is_empty(read_itm):  # Don't return empty read results.
                        read_items[sub_sec.section_name] = read_itm
                        break
                else:
                    # Always store read result is subsection did not close
                    read_items[sub_sec.section_name] = read_itm
            return read_items, s_context, done_read

        # Prepare for Subsection Read
        # Test for end of source
        if self.scan_status in ['Scan Complete', 'End of Source']:
            return  # Break if end of source reached
        done_read = False

        # This isolates the subsection context from the section context to
        # protect the section context items that shouldn't be changed by the
        # subsection.
        s_context = context.copy()

        # section_iter is wrapped in a BufferedIterator here so that the
        # subsection will return the appropriate indexing.
        buf_source = BufferedIterator(source)
        buf_source.link(self.source)

        # Select single subsection or subsection group
        if len(subsections) == 1:
            # Reading single subsection
            logger.debug(f'Process single sub-section '
                         f'{subsections[0].section_name} in: '
                         f'{self.section_name}')
            sub_reader = read_section
        else:
            sub_reader = read_group

        # Subsection Reading
        while not done_read:
            read_items, s_context, done_read = sub_reader(subsections,
                                                          buf_source, s_context)
            if read_items:  # Don't return empty read results.
                yield read_items

        # Wrap up after subsection(s) read
        # This updates the relevant items in the section context
        self.context.update(s_context)

        # re-align section source with subsection source
        source_pointer = buf_source.item_count
        logger.debug(f'Moving section source to item #{source_pointer}')
        self.source.goto_item(source_pointer, buffer_overrun=True)

In [None]:
# replace Section objects with
cleaned_processing_def = list()
for func in processing_def:
    clean_func = self.set_subsection_reader(func)
    cleaned_processing_def.append(clean_func)