# Identify Empty read results.
Handle empty objects of different types.

## Setup

### Imports

In [1]:
from typing import List, Any

import re
import logging
import pprint
from pathlib import Path

#### Packages

In [2]:
import numpy as np
import pandas as pd

#### Local Imports

### Testing Functions

In [3]:
def test_bool(item_list: List[Any]):
    for itm in item_list:
        try:
            has_value = bool(itm)
        except ValueError as err:
            print(f'{str(err.__class__)} raised by\n{repr(itm)}\n')
        else:
            print(f'bool({repr(itm)}) is {str(has_value)}\n')

In [4]:
def test_len(item_list: List[Any]):
    for itm in item_list:
        try:
            has_value = len(itm)
        except TypeError as err:
            print(f'{str(err.__class__)} raised by\n{repr(itm)}\n')
        else:
            print(f'len({repr(itm)}) is {str(has_value)}\n')

In [5]:
def test_empty(item_list: List[Any]):
    for itm in item_list:
        try:
            has_value = itm.empty
        except AttributeError as err:
            print(f'{str(err.__class__)} raised by\n{repr(itm)}\n')
        else:
            print(f'{repr(itm)}.empty is {str(has_value)}\n')

In [6]:
def test_nan(item_list: List[Any]):
    for itm in item_list:
        try:
            is_empty = np.isnan(itm).all()
        except TypeError as err:
            print(f'{str(err.__class__)} raised by\n{repr(itm)}\n')
        else:
            print(f'np.isnan({repr(itm)}).all() is {str(is_empty)}\n')

## Introduction

In general we do not want to return empty `subsection.read` results.  However identifying an _empty_ results is not straight forward.

Some possible **Empty Object** Examples are given below:
- _None_ will always be considered empty. `None` 
- _Empty string_ `''`
- _Empty list_ `[]`
- _Empty dictionary_ `{}`
- _List containing empty string_ `['']`
- _DataFrame containing no values_ `pd.DataFrame()`
- _Series containing no values_ `pd.Series()`
- _DataFrame containing all NaN_ `pd.DataFrame(index=range(2), columns=range(3))`
- _Numpy array of length 0_ `np.array([])`
- _Numpy array containing all NaN_ `np.full((2, 3), np.nan, dtype=float)`


In [7]:
empty_read_itm_possibilities = [
    None,
    '',
    [],
    {},
    [''],
    pd.DataFrame(),
    pd.Series(dtype=object),
    np.array([]),
    np.full((2, 3), np.nan, dtype=float)
    ]

for itm in empty_read_itm_possibilities:
    print(itm)

None

[]
{}
['']
Empty DataFrame
Columns: []
Index: []
Series([], dtype: object)
[]
[[nan nan nan]
 [nan nan nan]]


Some possible **Non-Empty Objects** that might be mistaken for empty are given below:
- _Numerical `0` value_ `0`
- _String of spaces_ `' '`
- _List containing string of spaces_ `[' ']`
- _Dictionary with only `None` as a key_ `{None:0}`
- _Series containing all zeros_ `pd.Series([0])`
- _DataFrame containing some, but not all `NaN` values_ `pd.DataFrame([{0:0}, {None:1}], index=range(2), columns=range(3))`
- _Numpy Array containing some, but not all `NaN` values_ `np.array([[0,np.nan],[np.nan, np.nan]])`
- _Numpy Array containing all zeros_ `np.full((2, 3), 0, dtype=float)`
- _Numpy Array containing all `None`_ `np.empty((2, 3), dtype=object)`


In [8]:
non_empty_read_itm_possibilities = [
    0,
    ' ',
    [' '],
    {None:0},
    pd.Series([0]),
    pd.DataFrame([{0:0}, {None:1}], index=range(2), columns=range(3)),
    np.array([[0,np.nan],[np.nan, np.nan]]),
    np.full((2, 3), 0, dtype=float),    
    np.empty((2, 3), dtype=object)
    ]

for itm in non_empty_read_itm_possibilities:
    print(itm)

0
 
[' ']
{None: 0}
0    0
dtype: int64
     0   1   2
0  0.0 NaN NaN
1  NaN NaN NaN
[[ 0. nan]
 [nan nan]]
[[0. 0. 0.]
 [0. 0. 0.]]
[[None None None]
 [None None None]]


## Tests

`bool(obj)` produces a valid result for the following types:
> `None`, `''`, `[]`, `{}`

For empty items that produce a valid result, `False` indicated _empty_ and `True` indicates _not empty_.  **Except for numerical values of `0`** and **Lists containing an empty string**. 
Numerical values of `0` will register as _empty_.
Lists containing an empty string will register as _not empty_.

In [9]:
test_bool(empty_read_itm_possibilities)

bool(None) is False

bool('') is False

bool([]) is False

bool({}) is False

bool(['']) is True

<class 'ValueError'> raised by
Empty DataFrame
Columns: []
Index: []

<class 'ValueError'> raised by
Series([], dtype: object)

bool(array([], dtype=float64)) is False

<class 'ValueError'> raised by
array([[nan, nan, nan],
       [nan, nan, nan]])



  has_value = bool(itm)


In [10]:
test_bool(non_empty_read_itm_possibilities)

bool(0) is False

bool(' ') is True

bool([' ']) is True

bool({None: 0}) is True

<class 'ValueError'> raised by
0    0
dtype: int64

<class 'ValueError'> raised by
     0   1   2
0  0.0 NaN NaN
1  NaN NaN NaN

<class 'ValueError'> raised by
array([[ 0., nan],
       [nan, nan]])

<class 'ValueError'> raised by
array([[0., 0., 0.],
       [0., 0., 0.]])

<class 'ValueError'> raised by
array([[None, None, None],
       [None, None, None]], dtype=object)



`len(obj)` produces a valid result for the following types:
> string, list, dict, pd.DataFrame, pd.Series, np.array

For empty items that produce a valid result, `0` indicated _empty_ and `>0` indicates _not empty_.  
**Except for lists containing string of spaces** and **Lists containing an empty string**. 
Both of these will register as _not empty_.

In addition `len()` is not valid for type `None`, but this should be caught.


In [11]:
test_len(empty_read_itm_possibilities)

<class 'TypeError'> raised by
None

len('') is 0

len([]) is 0

len({}) is 0

len(['']) is 1

len(Empty DataFrame
Columns: []
Index: []) is 0

len(Series([], dtype: object)) is 0

len(array([], dtype=float64)) is 0

len(array([[nan, nan, nan],
       [nan, nan, nan]])) is 2



In [12]:
test_len(non_empty_read_itm_possibilities)

<class 'TypeError'> raised by
0

len(' ') is 1

len([' ']) is 1

len({None: 0}) is 1

len(0    0
dtype: int64) is 1

len(     0   1   2
0  0.0 NaN NaN
1  NaN NaN NaN) is 2

len(array([[ 0., nan],
       [nan, nan]])) is 2

len(array([[0., 0., 0.],
       [0., 0., 0.]])) is 2

len(array([[None, None, None],
       [None, None, None]], dtype=object)) is 2



- _DataFrame_ and _Series_ have an attribute _empty_ which is `True` if it contains no values.

In [13]:
test_empty(empty_read_itm_possibilities)

<class 'AttributeError'> raised by
None

<class 'AttributeError'> raised by
''

<class 'AttributeError'> raised by
[]

<class 'AttributeError'> raised by
{}

<class 'AttributeError'> raised by
['']

Empty DataFrame
Columns: []
Index: [].empty is True

Series([], dtype: object).empty is True

<class 'AttributeError'> raised by
array([], dtype=float64)

<class 'AttributeError'> raised by
array([[nan, nan, nan],
       [nan, nan, nan]])



In [14]:
test_empty(non_empty_read_itm_possibilities)

<class 'AttributeError'> raised by
0

<class 'AttributeError'> raised by
' '

<class 'AttributeError'> raised by
[' ']

<class 'AttributeError'> raised by
{None: 0}

0    0
dtype: int64.empty is False

     0   1   2
0  0.0 NaN NaN
1  NaN NaN NaN.empty is False

<class 'AttributeError'> raised by
array([[ 0., nan],
       [nan, nan]])

<class 'AttributeError'> raised by
array([[0., 0., 0.],
       [0., 0., 0.]])

<class 'AttributeError'> raised by
array([[None, None, None],
       [None, None, None]], dtype=object)



In [15]:
test_nan(empty_read_itm_possibilities)

<class 'TypeError'> raised by
None

<class 'TypeError'> raised by
''

np.isnan([]).all() is True

<class 'TypeError'> raised by
{}

<class 'TypeError'> raised by
['']

np.isnan(Empty DataFrame
Columns: []
Index: []).all() is Series([], dtype: bool)

<class 'TypeError'> raised by
Series([], dtype: object)

np.isnan(array([], dtype=float64)).all() is True

np.isnan(array([[nan, nan, nan],
       [nan, nan, nan]])).all() is True



In [16]:
test_nan(non_empty_read_itm_possibilities)

np.isnan(0).all() is False

<class 'TypeError'> raised by
' '

<class 'TypeError'> raised by
[' ']

<class 'TypeError'> raised by
{None: 0}

np.isnan(0    0
dtype: int64).all() is False

np.isnan(     0   1   2
0  0.0 NaN NaN
1  NaN NaN NaN).all() is 0    False
1     True
2     True
dtype: bool

np.isnan(array([[ 0., nan],
       [nan, nan]])).all() is False

np.isnan(array([[0., 0., 0.],
       [0., 0., 0.]])).all() is False

<class 'TypeError'> raised by
array([[None, None, None],
       [None, None, None]], dtype=object)



## Conclusion
The best performance was with the `len()` function.  However, it raises a `TypeError` for `None` and we do want to prevent `read` functions from returning `None`.

1. Create an `test_for_empty` attribute in the _Section_ class.
2. Link the `test_for_empty` attribute with an _is_empty_ utility function that:
   1. Checks for `None` __and__
   2. Checks for `len(obj) == 0`.
3. Allow instances of _Section_ to replace the _is_empty_ function by assigning a different (or no) function to the `test_for_empty` attribute.