In [122]:
intention = '''Draft or create a class system to represent the pular entries. 
Ideally this will contain a way to nest entry objects under a root
'''
#%pip install docx
#%pip install python-docx #this mutates docx? 
#%pip install pydantic
#%pip install mypy
#%pip install numpy
#%pip install typing extensions
#%pip install verbalexpressions
# %pip install typing-extensions
from typing import Optional, Dict, List, Any, Union, Tuple, TypeVar
from typing_extensions import Literal
from pydantic import BaseModel, ValidationError, validator, root_validator, Field, constr
import json
import docx
from docx import Document
from datetime import datetime
import logging
from itertools import compress, tee, chain
import re
from collections import Counter
import string
import numpy as np
from verbalexpressions import VerEx

In [123]:
#get current datetime
now = datetime.now()
current_time = now.strftime("%Y-%m-%d_-_%H-%M-%S")

# #create file to save prints (use with jupyter magic enabled at the top of this cell: %%capture cap --no-stderr)
# output_name = f"{current_time}_result.txt"
# experiment = input("Enter emperiment description:")
# print(f"Experiment time: {current_time}\nExperiment note: {experiment}\n\n")

logger_filename = f"logs_and_outputs/initialization_placeholder.log"

# Creating an object
logger = logging.getLogger()

# Setting the threshold of logger to DEBUG
logger.setLevel(logging.ERROR)

#add encoding
handler = logging.FileHandler(logger_filename, 'w', 'utf-8') 
handler.setFormatter(logging.Formatter('%(asctime)s %(message)s'))
logger.addHandler(handler) 

# # Test messages
logger.debug("current_time")
# logger.info("Just an information")
# logger.warning("Its a Warning")
# logger.error("Did you try to divide by zero")
# logger.critical("Internet is down")

In [124]:
def pairwise(iterable):
    # pairwise('ABCDEFG') --> AB BC CD DE EF FG
    a, b = tee(iterable)
    next(b, None)
    return zip(a, b)
    
def logger_root_validation_error_messages(e, logger_details, suppress = [], run_enumeration: Optional[int] = None) -> Union[RuntimeError, TypeError]:      
   #TODO add ability to handle assertion errors
   if run_enumeration is not None:
      run_num = f"|run#{run_enumeration}|" #type: ignore 
   else:
      run_num = ""
   try:
      for err in e.errors():
         if err['type'] in suppress['type'] or err['msg'] in suppress['msg']:
            logger.info(f"|SUPRESSED|{logger_details['function']}|{type(e)}|para#{logger_details['paragraph_enumeration']}{run_num}, with validation? error: {err}")
            return TypeError("suppressed Validation Error")
         else:
            logger.error(f"|unsuppressed|{logger_details['function']}|{type(e)}|para#{logger_details['paragraph_enumeration']}{run_num}, with validation? error: {err}")
            return TypeError("un-suppressed Validation Error")
   except:
      logger.error(f"|unsuppressed|{logger_details['function']}|{type(e)}|para#{logger_details['paragraph_enumeration']}{run_num}, with error: {e}")
      return RuntimeError("non-validation error")
   return RuntimeError("non-validation error")

def pular_str_strip_check(s:str) ->bool:
   in_len = len(s)
   new_s = s.strip()
   out_len = len(new_s)
   purported_whitespace: bool = in_len != out_len
   return purported_whitespace

def closest(ranger, target): #any target indeces occuring before the first ranger index will be ignored
   if not isinstance(target,np.ndarray):
      target = np.array(target)
   for a,b in ranger:
      begin = np.searchsorted(target,a)
      end = np.searchsorted(target,b)
      _, out, target = np.split(target, [begin,end])
      yield list(out)
   yield list(target)

def coerceFalse(val):
   if isinstance(val, type(None)):
      return False
   else: 
      return val

In [181]:
from pydantic import BaseModel, PrivateAttr
class Docx_Run(BaseModel):
   """input:   'run': your_run_here
   when given a docx document paragraphs run object, will parse it to a specified schema
   """
   run_text : constr(min_length = 1) #type:ignore #required, must be string, must be 1 long or more
   run_font_name : Union[str,Literal[False]] #required, must be string or None value
   run_font_size_pt : Union[float,Literal[False]] #Required, but must be float OR none value
   run_bold : bool #Required, but must be bool OR none value
   run_italic : bool #Required, but must be bool OR none value

   class Config:
      validate_all = True
      # extra = 'forbid'
      validate_assignment = True
      smart_union = True
      # underscore_attrs_are_private = True

   @root_validator(pre=True)
   def _docx_structure_check(cls, values: Dict[str, Any]) -> Dict[str, Any]:
      run = values.get("run",False)
      assert isinstance(run, eval('docx.text.run.Run')), 'please enter a docx run assigned to the variable "run", in the form of     run = your_run_here'
      
      new_values : Dict[str, Any] = {}

      #select the desired features from the run
      new_values['run_text'] = coerceFalse(run.text) #type: ignore
      new_values['run_font_name'] = coerceFalse(run.font.name) #type: ignore
      if run.font.size is not None: #type: ignore
         new_values['run_font_size_pt'] = coerceFalse(run.font.size.pt) #type: ignore
      else: new_values['run_font_size_pt'] = False
      new_values['run_bold'] = coerceFalse(run.bold) #type: ignore
      new_values['run_italic'] = coerceFalse(run.italic) #type: ignore

      return new_values

   def as_tuple(self) -> Tuple[constr(min_length = 1), Union[str,Literal[False]], Union[float,Literal[False]], bool, bool]:
      return (self.run_text, self.run_font_name, self.run_font_size_pt, self.run_bold, self.run_italic)
   
   def set_text(self, new_string):
      self.run_text = new_string

   def get_text(self):
      return self.run_text



class Docx_Paragraph (BaseModel):
   """input:   'paragraph': your_paragraph_here, 'paragraph_enumeration': your para number# here
   
   when given a docx document's paragraph object, will parse it to a specified schema
   """
   # docx_document_paragraph: Optional[Any] #This should be validated below. Left optional because its inclusion causes problems with default repr and serialization
   paragraph_enumeration: float
   para_text: constr(min_length = 1) #type:ignore ##required, must be string, must be 1 long or more
   para_first_line_indent: Union[float,Literal[False]] #Required, but must be int OR none. https://pydantic-docs.helpmanual.io/usage/models/#required-optional-fields 
   para_left_indent: Union[float,Literal[False]] #Required, but must be int OR none. https://pydantic-docs.helpmanual.io/usage/models/#required-optional-fields 
   #run features
   runs : List[Tuple[int,Docx_Run]]
   # runs: list

   class Config:
      validate_all = True
      extra = 'forbid'
      validate_assignment = True
      smart_union = True

   @root_validator(pre=True)
   def _docx_structure_check(cls, values: Dict[str, Any]) -> Dict[str, Any]:
      para = values.get("paragraph",False)
      assert isinstance(para, eval('docx.text.paragraph.Paragraph')), 'please enter a docx paragraph assigned to the variable "paragraph", in the form of     paragraph = your_paragraph_here'
      paragraph_enumeration = values.get("paragraph_enumeration",False)
      assert isinstance(paragraph_enumeration,int), 'please enter an float for the paragraph_enumeration'

      new_values: Dict[str, Any] = {}
      #extract para features, 
      new_values['para_text'] = para.text #type: ignore
      new_values['para_first_line_indent'] = para.paragraph_format.first_line_indent #type: ignore
      new_values['para_left_indent'] = para.paragraph_format.left_indent #type: ignore
      new_values['paragraph_enumeration'] = paragraph_enumeration

      suppress = {'type': ['value_error.any_str.min_length'], #ignore zero length run_text, per run validator
                  'msg': ['suppressed Validation Error']} #ignore suppressed errors earlier/lower in the stack      
      logger_details = {'function':'parsed_run', 'paragraph_enumeration':paragraph_enumeration}
      try:
         parsed_run_list = []
         for run_enumumeration,run in enumerate(para.runs): #type: ignore
            # print(i)
            # print(run.text)
            parsed_run = Docx_Run.parse_obj({'run':run})
            parsed_run_list.append((run_enumumeration,parsed_run))
         new_values['runs'] = parsed_run_list
      except ValidationError as e:
            new_e = logger_root_validation_error_messages(e, logger_details, suppress,run_enumeration=None)
            raise new_e
      except:
         raise         
      return new_values

# docx_filename = "Fula_Dictionary-repaired.docx"
docx_filename = "pasted_docx page 1.docx"
document = Document(docx_filename)

test_list = []
for i, para in enumerate(document.paragraphs):
   test_list.append(para)
r_raw = test_list[4].runs[2]
r = {'run':r_raw}

parse_list = []
for i,para in enumerate(test_list):
   try:
      obj = Docx_Paragraph.parse_obj({'paragraph':test_list[i],'para_enumeration':i})
      parse_list.append(obj)
      print(repr(obj))
      
   except ValidationError as e:
      suppress = {
            # 'type': ['value_error.any_str.min_length' #ignore zero length run_text, per run validator
            #          ],
            'msg': ['suppressed Validation Error'] #ignore suppressed errors earlier/lower in the stack      
      }
      for err in e.errors():
         if err['msg'] in suppress['msg']:
            # handled_errors.append((i,para))
            print('supressed: ',i)
            pass
   except BaseException as e:
      print(e)
      raise e
      # failed_paras_ind.append((i,para))

# i=4
# parse_list.append(Docx_Paragraph.parse_obj({'paragraph':test_list[i],'para_enumeration':i}))
# print(repr(parse_list[i]))
# parse_list = []
# for i,r in enumerate(test_list[i].runs):
#    parse_list.append(Docx_Run.parse_obj({'run':r}))
# parse_list[i].get_text()
# print(len(parse_list[i].dict().keys()))
# len(parse_list[i].as_tuple())

supressed:  0
Docx_Paragraph(paragraph_enumeration=0.0, para_text='A', para_first_line_indent=-182880.0, para_left_indent=182880.0, runs=[(0, Docx_Run(run_text='A', run_font_name='False', run_font_size_pt=12.0, run_bold=False, run_italic=False))])
Docx_Paragraph(paragraph_enumeration=0.0, para_text='a  prn,sbj,sf  DFZH  Z<->', para_first_line_indent=-91440.0, para_left_indent=182880.0, runs=[(0, Docx_Run(run_text='a  ', run_font_name='TmsRmn 10pt', run_font_size_pt=0.0, run_bold=True, run_italic=False)), (1, Docx_Run(run_text='prn', run_font_name='TmsRmn 10pt', run_font_size_pt=0.0, run_bold=False, run_italic=True)), (2, Docx_Run(run_text=',sbj,sf', run_font_name='TmsRmn 10pt', run_font_size_pt=0.0, run_bold=False, run_italic=True)), (3, Docx_Run(run_text='  ', run_font_name='TmsRmn 10pt', run_font_size_pt=0.0, run_bold=False, run_italic=True)), (4, Docx_Run(run_text='DFZH  Z<->', run_font_name='Helv 8pt', run_font_size_pt=8.0, run_bold=False, run_italic=False))])
Docx_Paragraph(paragr

In [204]:
# #hidden private runtext. PRobably uneccessary
# from pydantic import BaseModel, PrivateAttr
# # import property
# class Docx_Run(BaseModel):
#    """input:   'run': your_run_here
#    when given a docx document paragraphs run object, will parse it to a specified schema
#    """
#    _run_text : constr(min_length = 1) #type:ignore #required, must be string, must be 1 long or more
#    run_font_name : Union[str,Literal[False]] #required, must be string or None value
#    run_font_size_pt : Union[float,Literal[False]] #Required, but must be float OR none value
#    run_bold : bool #Required, but must be bool OR none value
#    run_italic : bool #Required, but must be bool OR none value

#    class Config:
#       validate_all = True
#       extra = 'forbid'
#       validate_assignment = True
#       smart_union = True
#       underscore_attrs_are_private = True

#    # def __init__(self, **data):
#    #    super().__init__(**data)
#    #    # this could also be done with default_factory
#    #    try:
#    #       run = data['run']
#    #       assert isinstance(run, eval('docx.text.run.Run')), 'please enter a docx run assigned to the variable "run", in the form of     run = your_run_here'
#    #       rt : constr(min_length = 1) = run.text
#    #       # print(rt)
#    #       self._run_text  = rt
#    #    except: raise
#    # #   self.run_italic = False
#    # #   return data

#    @root_validator(pre=True)
#    def _docx_structure_check(cls, values: Dict[str, Any]) -> Dict[str, Any]:
#       run = values.get("run",False)
#       assert isinstance(run, eval('docx.text.run.Run')), 'please enter a docx run assigned to the variable "run", in the form of     run = your_run_here'
      
#       new_values : Dict[str, Any] = {}

#       #select the desired features from the run
#       new_values['_run_text'] = coerceFalse(run.text) #type: ignore
#       new_values['run_font_name'] = coerceFalse(run.font.name) #type: ignore
#       if run.font.size is not None: #type: ignore
#          new_values['run_font_size_pt'] = coerceFalse(run.font.size.pt) #type: ignore
#       else: new_values['run_font_size_pt'] = False
#       new_values['run_bold'] = coerceFalse(run.bold) #type: ignore
#       new_values['run_italic'] = coerceFalse(run.italic) #type: ignore

#       return new_values

#    def as_tuple(self) -> Tuple[constr(min_length = 1), Union[str,Literal[False]], Union[float,Literal[False]], bool, bool]:
#       return (self._run_text, self.run_font_name, self.run_font_size_pt, self.run_bold, self.run_italic)
   
#    @property
#    def run_text(self):
#       return self._run_text
   
#    # @run
#    # @run_text.setter
#    # def set_text(self, new_string):
#    #    self._run_text = new_string

#    @run_text.getter
#    def get_text(self):
#       return self._run_text





# class Docx_Paragraph (BaseModel):
#    """input:   'paragraph': your_paragraph_here, 'paragraph_enumeration': your para number# here
   
#    when given a docx document's paragraph object, will parse it to a specified schema
#    """
#    # docx_document_paragraph: Optional[Any] #This should be validated below. Left optional because its inclusion causes problems with default repr and serialization
#    paragraph_enumeration: float
#    para_text: constr(min_length = 1) #type:ignore ##required, must be string, must be 1 long or more
#    para_first_line_indent: Union[float,Literal[False]] #Required, but must be int OR none. https://pydantic-docs.helpmanual.io/usage/models/#required-optional-fields 
#    para_left_indent: Union[float,Literal[False]] #Required, but must be int OR none. https://pydantic-docs.helpmanual.io/usage/models/#required-optional-fields 
#    #run features
#    runs : List[Tuple[int,Docx_Run]]
#    # runs: list

#    class Config:
#       validate_all = True
#       extra = 'forbid'
#       validate_assignment = True
#       smart_union = True

#    @root_validator(pre=True)
#    def _docx_structure_check(cls, values: Dict[str, Any]) -> Dict[str, Any]:
#       para = values.get("paragraph",False)
#       assert isinstance(para, eval('docx.text.paragraph.Paragraph')), 'please enter a docx paragraph assigned to the variable "paragraph", in the form of     paragraph = your_paragraph_here'
#       paragraph_enumeration = values.get("paragraph_enumeration",False)
#       assert isinstance(paragraph_enumeration,int), 'please enter an float for the paragraph_enumeration'

#       new_values: Dict[str, Any] = {}
#       #extract para features, 
#       new_values['para_text'] = para.text #type: ignore
#       new_values['para_first_line_indent'] = para.paragraph_format.first_line_indent #type: ignore
#       new_values['para_left_indent'] = para.paragraph_format.left_indent #type: ignore
#       new_values['paragraph_enumeration'] = paragraph_enumeration

#       suppress = {'type': ['value_error.any_str.min_length'], #ignore zero length run_text, per run validator
#                   'msg': ['suppressed Validation Error']} #ignore suppressed errors earlier/lower in the stack      
#       logger_details = {'function':'parsed_run', 'paragraph_enumeration':paragraph_enumeration}
#       try:
#          parsed_run_list = []
#          for run_enumumeration,run in enumerate(para.runs): #type: ignore
#             # print(i)
#             # print(run.text)
#             parsed_run = Docx_Run.parse_obj({'run':run})
#             parsed_run_list.append((run_enumumeration,parsed_run))
#          new_values['runs'] = parsed_run_list
#       except ValidationError as e:
#             new_e = logger_root_validation_error_messages(e, logger_details, suppress,run_enumeration=None)
#             raise new_e
#       except:
#          raise         
#       return new_values

# # docx_filename = "Fula_Dictionary-repaired.docx"
# docx_filename = "pasted_docx page 1.docx"
# document = Document(docx_filename)

# test_list = []
# for i, para in enumerate(document.paragraphs):
#    test_list.append(para)
# r_raw = test_list[4].runs[2]
# r = {'run':r_raw}

# parse_list = []
# for i,para in enumerate(test_list):
#    try:
#       obj = Docx_Paragraph.parse_obj({'paragraph':test_list[i],'para_enumeration':i})
#       parse_list.append(obj)
#       print(repr(obj))

      
#    except ValidationError as e:
#       suppress = {
#             # 'type': ['value_error.any_str.min_length' #ignore zero length run_text, per run validator
#             #          ],
#             'msg': ['suppressed Validation Error'] #ignore suppressed errors earlier/lower in the stack      
#       }
#       # print('suppress')
#       for err in e.errors():
#          if err['msg'] in suppress['msg']:
#             # handled_errors.append((i,para))
#             print('supressed: ',i)
#             pass
#          else: raise e
#    except BaseException as e:
#       print(e)
#       raise e

# # parse_list[4].runs[1][1].as_tuple()
# # i=4
# # parse_list.append(Docx_Paragraph.parse_obj({'paragraph':test_list[i],'para_enumeration':i}))
# # print(repr(parse_list[i]))
# # parse_list = []
# # for i,r in enumerate(test_list[i].runs):
# #    parse_list.append(Docx_Run.parse_obj({'run':r}))
# # parse_list[i].get_text()
# # print(len(parse_list[i].dict().keys()))
# # len(parse_list[i].as_tuple())

In [126]:
# class Docx_Paragraph_and_Runs (BaseModel):
#    """input:   paragraph = your_paragraph_here
   
#    when given a docx document's paragraph object, will parse it to a specified schema
#    """

#    class Config:
#       extra = 'allow'
#       # arbitrary_types_allowed = True

#    @root_validator(pre=True) #TODO Try have post validator for runs only?
#    def _docx_structure_check(cls, values: Dict[str, Any]) -> Dict[str, Any]:
#       new_values: Dict[str, Any] = {}
#       para = values.get("paragraph",False)
#       assert isinstance(para, eval('docx.text.paragraph.Paragraph')), 'please enter a docx paragraph assigned to the variable "paragraph", in the form of     paragraph = your_paragraph_here'
      
#       paragraph_enumeration: int = values.get('paragraph_enumeration',None)
#       assert isinstance(paragraph_enumeration, int), "assertion error, bad paragraph count/paragraph_enumeration value passed. Please pass an integer"
#       new_values['paragraph_enumeration'] = paragraph_enumeration

      
#       #setting up error and logger handling
#       #suppress these errors
#       suppress = {'type': ['value_error.any_str.min_length' #ignore zero length run_text, per run validator
#                            ],
#                   'msg': ['suppressed Validation Error'] #ignore suppressed errors earlier/lower in the stack      
#       }
#       #try to extract para features, 
#       logger_details = {'function':'Docx_Paragraph', 'paragraph_enumeration':paragraph_enumeration }
#       try: 
#          parsed_paras = Docx_Paragraph(**{'paragraph':para}) #type: ignore
#          for k,v in parsed_paras.dict().items():
#             new_values[k] = v
#       # except ValidationError as e:
#       #    logger_root_validation_error_messages(e, logger_details, suppress)
#       except BaseException as e:
#          new_e = logger_root_validation_error_messages(e, logger_details, suppress)
#          raise new_e

#       #try to extract runs features
#       logger_details = {'function':'Docx_Run_List', 'paragraph_enumeration':paragraph_enumeration }    
#       try:
#          parsed_runs = Docx_Run_List(**{'run_list':para.runs, 'paragraph_enumeration':paragraph_enumeration}) #type: ignore
#          for k,v in parsed_runs.dict().items():
#             new_values[k] = v
#       except BaseException as e:
#          new_e = logger_root_validation_error_messages(e, logger_details, suppress)
#          raise new_e
         
#       return new_values
      
#    def interogate__para_text(self) -> str:
#       t = getattr(self, 'para_text', "")
#       # 
#       if len(t) == 0:
#          logger.warning('interogator did not find para_text')
#       #    print("no para_text with:\n\t", self.dict())
#       return t

#    def paragraph_logger(self,level:int,msg:str,print_bool:bool):
#       if print_bool:
#          print(msg)
#       else:
#          logger.log(level,msg)

#    def single_run_feature_identify(self,params:Dict[str,Any]) -> Tuple[bool,Tuple[List[bool],List[Any]],Tuple[List[bool],List[Optional[str]]]]: 
#       """if regex provided, must be in param dict with name 'text_regex_at_feature', and must be passed as a r'pattern' raw string
#       return tuple of ('feature boolean', feature_Tuple[boolean mask, feature list], regex_tuple[boolean mask, regex match list])
#       """
#       enumeration : Optional[int] = getattr(self,"paragraph_enumeration",None)
#       assert isinstance(enumeration, int),f"bad value for 'paragraph_enumeration' {enumeration}"
#       run_texts : Optional[List[str]] = getattr(self,'run_text',None)
#       assert run_texts is not None, f"bad value for 'run_text' {self.__repr__()}"
#       feature = params['docxFeature']
#       assert isinstance(feature,str),f"bad value for parameter 'docxFeature'. Check params: {params}"
#       text_regex_at_feature = params.get('text_regex_at_feature',False)
#       regex_mask: List[bool] = []
#       regex_matches: List[Optional[str]] = []

#       values_from_runs: List[Optional[Union[float,bool]]] = getattr(self,feature,[None]) 
#       value_mask: List[bool] = [True if x == params['value'] else False for x in values_from_runs]
      
#       if any(value_mask):
#          # print('text and value mask: ',run_texts,value_mask)
#          # if text_regex_at_feature:
#             # pattern = text_regex_at_feature
#             # for text in run_texts:
#             #    match = re.search(pattern, text) #type: ignore
#             #    if match is not None:
#             #       regex_mask.append(True)
#             #       regex_matches.append(match.group(0))
#             #       # print(repr(self))
#             #    else:
#             #       regex_mask.append(False)
#             #       regex_matches.append(None)
#             # print('regex and match: ',regex_mask,regex_matches)
#             # # print(f'inside regex bool for para#{enumeration}\tregex_mask_is: {regex_mask}\t\tvalue_mask is: {value_mask}')
#             # if not any(compress(value_mask,regex_mask)):
#             #    return False, (value_mask, values_from_runs), (regex_mask, regex_matches) #does not have feature
#          return True, (value_mask, run_texts), (regex_mask, regex_matches)  #has Feature
#       else:
#          return False, (value_mask, run_texts), (regex_mask, regex_matches) #does not have feature

#    def modify_run_lists(self, drop_runs: Optional[List[int]] = None, add_runs: Optional[Tuple[int, List[List[Any]]]] = None, merge_runs : bool = False): #-> Optional[Dict[str, List[List[Any]]]]
#       """given a list of indexes as 'drop' will drop those indexes from runlists, and return those dropped
#       given a tuple with an integer index and list of lists (run aligned), will add those to entries to the runlists at that index
#       given bool merge, will greedy merge all runs with the same run features EXCEPT run_text. Run_texts will be concatenated
#       """
#       run_list_req_features: List[str] = Docx_Run_List.schema()['required']
#       assert run_list_req_features[0] == 'run_text', "first feature in the schema should be run_text"
#       para_enumeration = getattr(self, 'paragraph_enumeration',None)
#       assert para_enumeration is not None, 'paragraph did not have an enumeration value'

#       feature_run_lists : List[List[Any]] = []
#       for f in run_list_req_features:
#          feature_run_lists.append(getattr(self,f,[]))
#       pivoted_run_lists = list(map(list, zip(*feature_run_lists)))
#       number_of_runs : int = len(pivoted_run_lists)
#       if number_of_runs < 1:
#          raise ValueError('this paragraph does not have values in the run lists')
      
#       beginning_repr = self.__repr__()

#       if drop_runs is not None:
#          dropped_runs = {}
#          num_dropped = 0
#          for ind in drop_runs:
#             mut_ind = ind - num_dropped #mutate pivot indexes as the pivot array is mutated
#             dropped_runs[ind] = pivoted_run_lists.pop(mut_ind) #mutates pivoted_run_lists
#             num_dropped +=1
#          if number_of_runs == len(pivoted_run_lists):
#             raise RuntimeError('the runs_lists were not shortened as expected')
#          number_of_runs : int = len(pivoted_run_lists)
#          # print(dropped_runs,pivoted_run_lists)
#          feature_run_lists = list(map(list, zip(*pivoted_run_lists)))
#          logger.info(f'para#{para_enumeration} had runs# {drop_runs} dropped. New run_text is: {feature_run_lists[0]}')

#       if add_runs is not None:
#          insert_ind : int = add_runs[0]
#          add_lists = add_runs[1]
#          assert len(add_lists[0]) == number_of_runs, "the added list of lists must have runs of the same length (feature space) as run_lists features in the schema: Docx_Run_List.schema()['required']"
#          if insert_ind == -1:
#             insert_ind = number_of_runs
#          for lst in add_lists:
#             pivoted_run_lists.insert(insert_ind,lst)
#          number_of_runs : int = len(pivoted_run_lists)
#          feature_run_lists = list(map(list, zip(*pivoted_run_lists)))
      
#       merge_occured = False
#       if merge_runs is not False:
#          i = 0
#          still_merging = True
#          # beginning_repr = self.__repr__()
#          while still_merging:
#             pairs = list(pairwise(list(range(len(pivoted_run_lists))))) #index pairs
#             if len(pairs) < 1: #onely 1 run, which causes pairwise to yield empty lists since nothing to pair with
#                break
#             num_merged = 0
#             for a,b in pairs: #where a,b are indexes in the pivoted run list (each index is one run)
#                a -= num_merged #mutate pivot indexes after the pivot array has been mutated
#                b -= num_merged
#                if pivoted_run_lists[a][1:] == pivoted_run_lists[b][1:]: #if all features EXCEPT run_text are the same #TODO add ability to config which features to merge on
#                   pivoted_run_lists[b][0] = pivoted_run_lists[a][0] + pivoted_run_lists[b][0]
#                   pivoted_run_lists.pop(a)
#                   num_merged +=1
#                   merge_occured = True #flag for end of function, to determine if any changes need to be set to 'self'
#                else: pass 
#             if num_merged < 1: #if no merges where made in this iteration, merging is done. Else keep while loop since new merges may occur with new neighbors
#                still_merging = False
#          number_of_runs : int = len(pivoted_run_lists)
#          feature_run_lists = list(map(list, zip(*pivoted_run_lists)))

#       if any([drop_runs is not None, add_runs is not None, merge_occured]):
#          for i, f in enumerate(run_list_req_features):
#             self.__setattr__(f,feature_run_lists[i])

#    def cleaner(self, execute_defaults: bool = True) -> bool : #params:Optional[Dict[str,Any]],
#       """defaults to running "remove_para_leading_whitespace". This removes leading runs that are blank, and strips the first text run of any LEADING whitespace, if any is present.
#       the params dict is not implemented currently
#       returns bool value. True means cleaner would yield a valid para. False currently indicates all runs in para are whitespace.
#       """
#       #TODO aggregate these getattrs so that every function doesn't need to get it themselves. Or simplify this with a function that has an assert bool to require it or not
#       para_enumeration = getattr(self, 'paragraph_enumeration',None)
#       assert para_enumeration is not None, 'paragraph did not have an enumeration value'

#       def remove_para_leading_whitespace(start_ind : int = 0): #run 
#          # try: #expect to fail when reaches the end of the list
#          para_text : Optional[str] = getattr(self, 'para_text',None)
#          if isinstance(para_text,str):
#             if len(para_text.strip()) == 0: #if para's text is ONLY whitespace
#                return False
#          run_text_list : List[str] = getattr(self, 'run_text',[''])
#          num_runs = len(run_text_list)

#          ind = start_ind
#          droppable_runs : List[int] = [] #TODO this dropable section doesnt seem to be working correctly.
#          while ind < num_runs:
#             this_run_text = run_text_list[ind]
#             stripped_run = this_run_text.lstrip() #TODO pass config to this to allow control of what can and can't be dropped.
#             if len(stripped_run) == 0: #found ALL whitespace run. Need to iterate to see if next run is blank or has any leading whitespace
#                droppable_runs.append(ind) #TODO convert this change to a an equivalent para_indent, since this paragraph likely has incorrect indents
#                logger.info(f'paragraph#{para_enumeration} with text "{para_text}" had a run#{ind} with ONLY whitespace')
#             elif len(stripped_run) < len(this_run_text): #found run that is NOT ALL whitespace, but had SOME. Will only happen once. Can stop now since this is the true beginning of this paragraph
#                run_text_list[ind] = stripped_run
#                self.__setattr__("run_text", run_text_list) #TODO convert this change to a an equivalent para_indent, since this paragraph likely has incorrect indents
#                logger.info(f'paragraph#{para_enumeration} with text "{para_text}" had leading whitespace removed')
#                break
#             else: #Can stop now since this is the true beginning of this paragraph
#                break
#             ind +=1
            
#          if len(droppable_runs) > 0: #if a whole run_text was whitespace only
#             if len(droppable_runs) == num_runs: #if the whole paragraph was whitespace only
#                raise RuntimeError(f'for paragraph#{para_enumeration}, all runs purported droppable whitespace, but para_text purported not')
#             self.modify_run_lists(drop_runs = droppable_runs) #this removes whole runs, not just modifying the run_text.
#             logger.info(f'paragraph#{para_enumeration} with text "{para_text}" tried to drop a run whitespace')

#       if execute_defaults:
#          remove_para_leading_whitespace()
#          self.modify_run_lists(merge_runs = True)

#       return True