### Imports

In [1]:
from functools import partial
from pprint import pprint

import sections as sec
import text_reader as tr
from buffered_iterator import BufferedIterator


### Sub-Section Section
> **Section contents:**
>```
>Multi Combined Group Section
>Section Name:D3
>D3 Content1:a
>D3 Content2:b
>D3 Content3:c
>Section Name:D4
>D4 Content1:d
>...
>D6 Content3:l
>End Section
>```
- Section Starts with *Multi Combined Group Section*
- Sub-section Starts **and Ends** with *Section Name*
- Section Ends with *End Section*
- Second column starts after ***:***

In [2]:
multi_group_source = [
    'Multi Combined Group Section',
    'Section Name:D3',
    'D3 Content1:a',
    'D3 Content2:b',
    'D3 Content3:c', 
    'Section Name:D4',
    'D4 Content1:d',
    'D4 Content2:e',
    'D4 Content3:f',
    'Section Name:D5',
    'D5 Content1:g',
    'D5 Content2:h',
    'D5 Content3:i',
    'Section Name:D6',
    'D6 Content1:j',
    'D6 Content2:k',
    'D6 Content3:l',
    'End Section',
    ]

In [3]:
multi_group_source = [
    'Multi Combined Group Section',
    'Section Name:D3',
    'D3 Content1:a',
    'D3 Content2:b',
    'D3 Content3:c', 
    'Section Name:D4',
    'D4 Content1:d',
    'D4 Content2:e',
    'D4 Content3:f',
    'Section Name:D5',
    'D5 Content1:g',
    'D5 Content2:h',
    'D5 Content3:i',
    'Section Name:D6',
    'D6 Content1:j',
    'D6 Content2:k',
    'Section Name:D3',
    'D6 Content3:l',
    'D3 Content1:a',
    'D3 Content2:b',
    'D3 Content3:c', 
    'Section Name:D4',
    'D4 Content1:d',
    'D4 Content2:e',
    'D4 Content3:f',
    'Section Name:D5',
    'D5 Content1:g',
    'D5 Content2:h',
    'D5 Content3:i',
    'Section Name:D6',
    'D6 Content1:j',
    'D6 Content2:k',
    'D6 Content3:l',
    'End Section',
    ]

### Define a subsection

In [4]:
delimiter_parser = tr.define_csv_parser(
    'delimiter_parser',
    delimiter=':',
    skipinitialspace=True
    )

delimiter_section_reader = [
    delimiter_parser,
    tr.trim_items,
    tr.drop_blanks
    ]

sub_delimiter_section = sec.Section(
    section_name='Sub Section',
    start_section=sec.SectionBreak('Section Name',
                                   break_offset='Before'),
    end_section=sec.SectionBreak('Section Name',
                                   break_offset='Before'),
    processor=delimiter_section_reader,
    aggregate=partial(tr.to_dict, default_value=None)
    )

In [5]:
sub_delimiter_section.read(multi_group_source)

Text Processing      - DEBUG: Entered sub-section processor for: Sub Section
Text Processing      - DEBUG: No sub-sections in: Sub Section
Text Processing      - DEBUG: Resetting source for: Sub Section.
Text Processing      - DEBUG: Advancing to start of Sub Section.
Buffered Iterator    - DEBUG: Getting item: Multi Combined Group Section	 from source
Text Processing      - DEBUG: In:	Sub Section	Got item:	Multi Combined Group Section
Text Processing      - DEBUG: Break Status:	Scan In Progress
Text Processing      - DEBUG: Checking Trigger: SectionBreak
Text Processing      - DEBUG: in section_break.check
Buffered Iterator    - DEBUG: Getting item: Section Name:D3	 from source
Text Processing      - DEBUG: In:	Sub Section	Got item:	Section Name:D3
Text Processing      - DEBUG: Break Status:	Scan In Progress
Text Processing      - DEBUG: Checking Trigger: SectionBreak
Text Processing      - DEBUG: in section_break.check
Text Processing      - DEBUG: Break triggered by Section Name
Tex

{'Section Name': 'D3',
 'D3 Content1': 'a',
 'D3 Content2': 'b',
 'D3 Content3': 'c'}

### Aggregate definitions

In [6]:
def print_list(parsed_lines):
    '''print items and add then to a list.
    '''
    output = list()
    for line_item in parsed_lines:
        pprint(line_item)
        output.append(line_item)
    return output


def make_list(parsed_lines):
    '''add items to a list, dropping empty items.
    '''
    output = list()
    for line_item in parsed_lines:
        if line_item:
            output.append(line_item)
    return output

### Define Section with sub-sections

In [7]:
repeat_group_section = sec.Section(
    section_name='Repeated Sub-Section Groups',
    start_section='Multi Combined Group Section',
    end_section='End Section',
    subsections=[sub_delimiter_section],
    end_on_first_item=True,
    aggregate=make_list
    )

# FIXME Sections D4 is being missed

In [8]:
repeat_group_section.read(multi_group_source)

Text Processing      - DEBUG: Entered sub-section processor for: Repeated Sub-Section Groups
Text Processing      - DEBUG: Process repeated sub-section Sub Section in: Repeated Sub-Section Groups
Text Processing      - DEBUG: Entered sub-section processor for: Sub Section
Text Processing      - DEBUG: No sub-sections in: Sub Section
Text Processing      - DEBUG: Setting new source for: Sub Section.
Text Processing      - DEBUG: Advancing to start of Sub Section.
Text Processing      - DEBUG: Resetting source for: Repeated Sub-Section Groups.
Text Processing      - DEBUG: Advancing to start of Repeated Sub-Section Groups.
Buffered Iterator    - DEBUG: Getting item: Multi Combined Group Section	 from source
Text Processing      - DEBUG: In:	Repeated Sub-Section Groups	Got item:	Multi Combined Group Section
Text Processing      - DEBUG: Break Status:	Scan In Progress
Text Processing      - DEBUG: Checking Trigger: SectionBreak
Text Processing      - DEBUG: in section_break.check
Text Proc

[{'Section Name': 'D3',
  'D3 Content1': 'a',
  'D3 Content2': 'b',
  'D3 Content3': 'c'},
 {'Section Name': 'D5',
  'D5 Content1': 'g',
  'D5 Content2': 'h',
  'D5 Content3': 'i'},
 {'Section Name': 'D6', 'D6 Content1': 'j', 'D6 Content2': 'k'},
 {'Section Name': 'D3',
  'D6 Content3': 'l',
  'D3 Content1': 'a',
  'D3 Content2': 'b',
  'D3 Content3': 'c'},
 {'Section Name': 'D4',
  'D4 Content1': 'd',
  'D4 Content2': 'e',
  'D4 Content3': 'f'},
 {'Section Name': 'D5',
  'D5 Content1': 'g',
  'D5 Content2': 'h',
  'D5 Content3': 'i'},
 {'Section Name': 'D6',
  'D6 Content1': 'j',
  'D6 Content2': 'k',
  'D6 Content3': 'l'}]

### Fixed Width Section
> **Section contents:**
>```
>0123456789012345678790123456789
>
>Single Fixed Width Section
>Section Name    F1
>F1 Content1     d
>F1 Content2     e
>F1 Content3     f
>End Section
>
>0123456789012345678790123456789
>```
- Starts with *Single Fixed Width Section*
- Second column starts at ***16***
- Ends with *End Section*

In [5]:
fixed_width_source = [
    'Single Fixed Width Section',
    'Section Name    F1',
    'F1 Content1     d',
    'F1 Content2     e',
    'F1 Content3     f',
    'End Section'
    ]

In [6]:
fixed_width_reader = [
    tr.define_fixed_width_parser(widths=16),
    tr.trim_items,
    tr.drop_blanks,
    tr.convert_numbers
    ]

fixed_width_section = sec.Section(
    section_name='Fixed Width Section',
    start_section=sec.SectionBreak('Single Fixed Width Section', 
                                   break_offset='After'),
    end_section='End Section',
    processor=fixed_width_reader,
    aggregate=partial(tr.to_dict, default_value=None)
    )

In [7]:
fixed_width_section.read(fixed_width_source)

{'Section Name': 'F1',
 'F1 Content1': 'd',
 'F1 Content2': 'e',
 'F1 Content3': 'f'}

### Delimiter Section
> **Section contents:**
>```
>Single Delimiter Section
>Section Name:D2
>D2 Content1:m
>D2 Content2:n
>D2 Content3:o
>End Section
>```
- Starts with *Single Delimiter Section*
- Second column starts after ***:***
- Ends with *End Section*

In [8]:
delimiter_source = [
    'Single Delimiter Section',
    'Section Name:D2',
    'D2 Content1:m',
    'D2 Content2:n',
    'D2 Content3:o',
    'End Section'
    ]

In [9]:
delimiter_parser = tr.define_csv_parser(
    'delimiter_parser',
    delimiter=':',
    skipinitialspace=True
    )

delimiter_section_reader = [
    delimiter_parser,
    tr.trim_items,
    tr.drop_blanks
    ]

delimiter_section = sec.Section(
    start_section=sec.SectionBreak('Single Delimiter Section', 
                                   break_offset='After'),
    end_section='End Section',
    processor=delimiter_section_reader,
    aggregate=partial(tr.to_dict, default_value=None)
    )

In [10]:
delimiter_section.read(delimiter_source)

{'Section Name': 'D2',
 'D2 Content1': 'm',
 'D2 Content2': 'n',
 'D2 Content3': 'o'}

### Source Definition
A list of strings

In [None]:
test_source = [
    'Single Fixed Width Section',
    'Section Name    F1',
    'F1 Content1     d',
    'F1 Content2     e',
    'F1 Content3     f',
    'End Section',
    '',
    'Text to be ignored',
    '',
    'More Text to be ignored',
    '',
    'Single Delimiter Section',
    'Section Name:D2',
    'D2 Content1:m',
    'D2 Content2:n',
    'D2 Content3:o',
    'End Section',
    '',
    'Even More Text to be ignored',
    '',
    'Single Fixed Width Section',
    'Section Name    F2',
    'F2 Content1     p',
    'F2 Content2     q',
    'F2 Content3     r',
    'End Section',
    '',
    'Final Text to be ignored',
    '',
    'Multi Combined Group Section',
    'Single Section',
    'Section Name:D3',
    'D3 Content1:a',
    'D3 Content2:b',
    'D3 Content3:c',
    'Single Section',
    'Section Name:D4',
    'D4 Content1:d',
    'D4 Content2:e',
    'D4 Content3:f',
    'Single Section',
    'Section Name:D5',
    'D5 Content1:g',
    'D5 Content2:h',
    'D5 Content3:i',
    'Single Section',
    'Section Name:D6',
    'D6 Content1:j',
    'D6 Content2:k',
    'D6 Content3:l',
    'End Section',
    ]

### Expected Results

In [3]:
test_result = {
    'Section D1': {
        'Section Name':'D1',
        'D1 Content1': 'a',
        'D1 Content2': 'b',
        'D1 Content3': 'c'
        },
    'Section F1': {
        'Section Name':'F1',
        'F1 Content1': 'd',
        'F1 Content2': 'e',
        'F1 Content3': 'f'
        },
    'Test Multi Group Section': [
        {'Section Name':'D3',
            'D3 Content1': 'a',
            'D3 Content2': 'b',
            'D3 Content3': 'c'
        },
        {'Section Name':'D4',
            'D4 Content1': 'd',
            'D4 Content2': 'e',
            'D4 Content3': 'f'
        },
        {'Section Name':'D5',
            'D5 Content1': 'g',
            'D5 Content2': 'h',
            'D5 Content3': 'i'
        },
        {'Section Name':'D6',
            'D6 Content1': 'j',
            'D6 Content2': 'k',
            'D6 Content3': 'l'
        }
        ]
    }

In [None]:
multi_group_section = sec.Section(
    section_name='Group Section',
    start_section='Multi Combined Group Section',
    end_section='End Section',
    subsections=[sub_delimiter_section],
    aggregate=make_list
    )


### Reader definitions

### SectionBreak definitions


In [None]:

section_end = sections.SectionBreak(
    name='Single Section',
    sentinel='End Section'
    )
group_section_start = sections.SectionBreak(
    name='Combined Group Section',
    sentinel='Combined Group Section',
    break_offset='After'
    )
multi_group_section_start = sections.SectionBreak(
    name='Multi Combined Group Section',
    sentinel='Multi Combined Group Section',
    break_offset='After'
    )
group_section_end = sections.SectionBreak(
    name='End Group Section',
    sentinel='Done Combined Group Section',
    break_offset='Before'
    )


### Section definitions


In [None]:

def test_delimiter_sub_section_read(self):
    test_section = self.delimiter_section
    source = BufferedIterator(self.test_source)
    test_output = test_section.read(source, start_search=True,
                                    context=self.context)
    self.assertDictEqual(test_output, self.test_result['Section D1'])


In [None]:

def test_fixed_width_sub_section_read(self):
    test_section = self.fixed_width_section
    source = BufferedIterator(self.test_source)
    test_output = test_section.read(source, start_search=True,
                                    context=self.context)
    self.assertDictEqual(test_output, self.test_result['Section F1'])


In [None]:

def test_group_section_read(self):
    test_section = self.group_section
    source = BufferedIterator(self.test_source)
    test_output = test_section.read(source, start_search=True,
                                    context=self.context)
    expected_output = self.test_result['Test Group Section']
    for count, output in enumerate(zip(test_output[0], expected_output)):
        with self.subTest(section=count):
            section_output = output[0]
            expected_section_output = output[1]
            self.assertDictEqual(section_output,
                                    expected_section_output)


In [None]:

def test_multi_group_section_read(self):
    test_section = self.multi_group_section
    source = BufferedIterator(self.test_source)
    test_output = test_section.read(source, start_search=True,
                                    context=self.context)
    expected_output = self.test_result['Test Multi Group Section']
    for section_count, section_output in enumerate(zip(test_output,
                                                        expected_output)):
        for count, output in enumerate(zip(section_output[0],
                                            section_output[1])):
            subsection = f'{section_count}.{count}'
            with self.subTest(subsection=subsection):
                s_output = output[0]
                e_output = output[1]
                self.assertDictEqual(s_output, e_output)



