# Memory of TF data

Once TF features get loaded into memory, how much memory do they actually use?

And can we reduce that footprint?

In this notebook I am following this
[Hands-On Exploration of Python Memory Usage](https://code.tutsplus.com/tutorials/understand-how-much-memory-your-python-objects-use--cms-25609)

We take the
[Dead Sea Scrolls](https://github.com/etcbc/dss) as our leading example.

In [1]:
from sys import getsizeof, stderr
from itertools import chain
from collections import deque
from reprlib import repr
from array import array
from timeit import timeit

In [2]:
from tf.app import use
from tf.applib.helpers import dm

In [3]:
A = use('dss:clone', checkout='clone', hoist=globals())

Using TF-app in /Users/dirk/github/annotation/app-dss/code:
	repo clone offline under ~/github (local github)
Using data in /Users/dirk/github/etcbc/dss/tf/0.4:
	repo clone offline under ~/github (local github)
Using data in /Users/dirk/github/etcbc/dss/parallels/tf/0.4:
	repo clone offline under ~/github (local github)


In [4]:
def deepSize(o, handlers={}, verbose=False, seen=None):
    """ Returns the approximate memory footprint an object and all of its contents.

    Automatically finds the contents of the following builtin containers and
    their subclasses:  tuple, list, deque, dict, set and frozenset.
    To search other containers, add handlers to iterate over their contents:

        handlers = {SomeContainerClass: iter,
                    OtherContainerClass: OtherContainerClass.get_elements}

    """
    dict_handler = lambda d: chain.from_iterable(d.items())
    all_handlers = {tuple: iter,
                    list: iter,
                    deque: iter,
                    dict: dict_handler,
                    set: iter,
                    frozenset: iter,
                   }
    all_handlers.update(handlers)     # user handlers take precedence
    if seen is None:
      seen = set()                      # track which object id's have already been seen
    default_size = getsizeof(0)       # estimate sizeof object without __sizeof__

    def sizeof(o):
        if id(o) in seen:       # do not double count the same object
            return 0
        seen.add(id(o))
        s = getsizeof(o, default_size)

        if verbose:
            print(s, type(o), repr(o), file=stderr)

        for typ, handler in all_handlers.items():
            if isinstance(o, typ):
                s += sum(map(sizeof, handler(o)))
                break
        return s

    return sizeof(o)


##### Example call #####

# if __name__ == '__main__':
#    d = dict(a=1, b=2, c=3, d=[4,5,6,7], e='a string of chars')
#    print(deepSize(d, verbose=True))

In [119]:
def findStretches(sequence, threshold=1, amount=None):
  if amount == 1:
    return ((sequence[0], sequence[-1]),)
  prevN = 1
  gaps = []
  for n in sequence:
    gap = n - prevN
    if gap > 1:
      gaps.append((prevN, n))
    prevN = n
  nInGap = sum(e - b - 1 for (b, e) in gaps if b >= sequence[0] and e <= sequence[-1])
  nTotal = sequence[-1] - sequence[0] + 1
  print(f'gapped = {nInGap:>12,} of {nTotal:>12,}')
  gaps = tuple((b, e) for (b, e) in gaps if e - b > threshold)
  if len(gaps) == 0:
    return ((sequence[0], sequence[-1]),)
  if amount is not None and len(gaps) > amount:
    gaps = sorted(
      sorted(gaps, key=lambda x: (x[0] - x[1], x[0]))[0:amount],
      key=lambda x: x[0],
    )
  stretches = []
  if sequence[0] <= gaps[0][0]:
    stretches.append((sequence[0], gaps[0][0]))
  for (i, gap) in enumerate(gaps[0:-1]):
    stretches.append((gap[1], gaps[i + 1][0]))
  if sequence[-1] >= gaps[-1][1]:
    stretches.append((gaps[-1][1], sequence[-1]))
  return stretches

In [120]:
x = (0, 1, 2, 5, 6, 100, 102, 103, 104, 110, 150, 151)
x = tuple(1000 + n for n in x)
x

(1000, 1001, 1002, 1005, 1006, 1100, 1102, 1103, 1104, 1110, 1150, 1151)

In [121]:
findStretches(x)

gapped =          140 of          152


[(1000, 1002),
 (1005, 1006),
 (1100, 1100),
 (1102, 1104),
 (1110, 1110),
 (1150, 1151)]

In [122]:
x = (
  1,
  2,
  11,
  12,
  2001,
  2002,
  4001,
  4002,
)

In [123]:
findStretches(x)

gapped =        3,994 of        4,002


[(1, 2), (11, 12), (2001, 2002), (4001, 4002)]

In [124]:
class ADictInt(object):
  @classmethod
  def sizeParts(cls):
    def x(packed):
      yield packed.pointers
      yield packed.data
    return x
    
  def __init__(self, dictInt, threshold=10000, amount=5):
    pointers = array('I')
    data = array('I')
    data.append(0)
    pointers.append(0)
    pointer = 1
    origKeys = sorted(dictInt)
    firstKey = origKeys[0]
    lastKey = origKeys[-1]
    stretches = findStretches(origKeys, threshold=threshold, amount=amount)
    offsets = []
    valueIndex = {}
    nKeys = 0
    for (b, e) in stretches:
      offset = b - len(pointers)
      offsets.append((b, e, offset))
      for n in range(b, e + 1):
        val = dictInt.get(n, None)
        if val is None:
          pointers.append(0)
        else:
          nKeys += 1
          thisPointer = valueIndex.get(val, None)
          if thisPointer is None:
            thisPointer = pointer
            valueIndex[val] = pointer
            pointer += 1
            data.append(val)
          pointers.append(thisPointer)
    self.pointers = pointers
    self.data = data
    self.length = nKeys
    self._length = len(pointers)
    self.offsets = offsets
  
  def get(self, n):
    pointers = self.pointers
    data = self.data
    offsets = self.offsets
    for (b, e, offset) in offsets:
      if b <= n <= e:
        m = n - offset
        pointer = pointers[m]
        if pointer == 0:
          return None
        return data[pointer]
    return None

In [250]:
class ADictStr(object):
  @classmethod
  def sizeParts(cls):
    def x(packed):
      yield packed.pointers
      yield packed.data
    return x
    
  def __init__(self, dictStr, threshold=10000, amount=5):
    pointers = array('I')
    data = array('B')
    data.append(0)
    pointers.append(0)
    pointer = 1
    origKeys = sorted(dictStr)
    firstKey = origKeys[0]
    lastKey = origKeys[-1]
    stretches = findStretches(origKeys, threshold=threshold, amount=amount)
    offsets = []
    valueIndex = {}
    nKeys = 0
    for (b, e) in stretches:
      offset = b - len(pointers)
      offsets.append((b, e, offset))
      for n in range(b, e + 1):
        val = dictStr.get(n, None)
        if val is None:
          pointers.append(0)
        else:
          nKeys += 1
          thisPointer = valueIndex.get(val, None)
          if thisPointer is None:
            thisPointer = pointer
            valueIndex[val] = pointer
            bval = bytes(val, encoding ='utf8')
            pointer += len(bval)
            data.extend(bval)
          pointers.append(thisPointer)
    self.pointers = pointers
    self.data = data
    self.length = nKeys
    self._length = len(pointers)
    self.offsets = offsets
    pointers.append(len(data))
  
  def get(self, n):
    pointers = self.pointers
    data = self.data
    offsets = self.offsets
    for (b, e, offset) in offsets:
      if b <= n <= e:
        m = n - offset
        pointer = pointers[m]
        if pointer == 0:
          return None
        return str(data[pointer:pointers[m + 1]], encoding='utf8')
    return None

In [251]:
class ATupStr(object):
  @classmethod
  def sizeParts(cls):
    def x(packed):
      yield packed.pointers
      yield packed.data
    return x
    
  def __init__(self, tupStr, threshold=10000, amount=5):
    pointers = array('I')
    data = array('B')
    pointer = 0
    valueIndex = {}
    for val in tupStr:
      if val is None:
        pointers.append(0)
      else:
        thisPointer = valueIndex.get(val, None)
        if thisPointer is None:
          thisPointer = pointer
          valueIndex[val] = pointer
          pointer += len(val)
          data.extend(ord(c) for c in val)
        pointers.append(thisPointer)
    self.pointers = pointers
    self.data = data
    self.length = len(pointers)
    self._length = len(pointers)
    pointers.append(len(data))
  
  def get(self, n):
    pointers = self.pointers
    data = self.data
    pointer = pointers[n]
    return ''.join(chr(c) for c in data[pointer:pointers[n + 1]])
    return None

Analysis:

how many gaps
how many non gaps

what is the average gain per non gap: 90 for str, 45 for int

the loss per gap is 4 bytes

for string values:
packed strings take 4 bytes per char


In [252]:
def sizeParts(packed):
    yield packed.pointers
    yield packed.data
    
handlers = {
  ADictInt: sizeParts,
  ADictStr: sizeParts,
  ATupStr: sizeParts,
}

In [253]:
odi1 = {1: 1}
ods1 = {1: ''}
pdi1 = ADictInt(odi1)
pds1 = ADictStr(ods1)
dpodi1 = deepSize(odi1)
dpods1 = deepSize(ods1)
dppdi1 = deepSize(pdi1, handlers=handlers)
dppds1 = deepSize(pds1, handlers=handlers)
odi2 = {1: 1, 2: 2}
ods2 = {1: '', 2: ''}
pdi2 = ADictInt(odi2)
pds2 = ADictStr(ods2)
dpodi2 = deepSize(odi2)
dpods2 = deepSize(ods2)
dppdi2 = deepSize(pdi2, handlers=handlers)
dppds2 = deepSize(pds2, handlers=handlers)
print('int')
print(f'{dpodi2-dpodi1} {dppdi2-dppdi1}')
print('str')
print(f'{dpods2-dpods1} {dppds2-dppds1}')

gapped =            0 of            1
gapped =            0 of            1
gapped =            0 of            2
gapped =            0 of            2
int
28 0
str
28 0


In [224]:
dppdi1

216

In [225]:
dppdi2

216

In [226]:
pdi1.pointers

array('I', [0, 1])

In [227]:
pdi1.data

array('I', [0, 1])

In [228]:
pdi2.pointers

array('I', [0, 1, 2])

In [229]:
pdi2.data

array('I', [0, 1, 2])

In [230]:
deepSize(pdi1, handlers=handlers)

216

In [231]:
deepSize(pdi2)

56

In [232]:
cn = chr(0x12000)

In [233]:
cn

'𒀀'

In [243]:
x = cn + 'è' + 'd'

In [244]:
b = bytes(x, encoding='utf8')

In [248]:
c = str(b, encoding='utf8')

In [249]:
c

'𒀀èd'

In [246]:
len(b)

7

In [257]:
od = {i:cn+str(i)+'a'*1000 for i in range(1000)}
pd = ADictStr(od)

gapped =            0 of        1,000


In [258]:
deepSize(od)

4156524

In [259]:
deepSize(pd, handlers=handlers)

1027271

In [260]:
pd.get(500)

'𒀀500aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa

In [127]:
maxNode = 4500
orig = dict((
  (1, 1),
  (2, 2),
  (11, 11),
  (12, 12),
  (2001, 2001),
  (2002, 2002),
  (4001, 4001),
  (4002, 4002),
))

In [128]:
packed = ADictInt(orig)

gapped =        3,994 of        4,002


In [145]:
maxNode = F.otype.maxNode
orig = TF.features['otype'].data[0:-3]
packed = ATupStr(orig)
print(f'{type(orig)} {len(orig):>12,} {packed.length:>12,}')
print(f'{deepSize(orig):>12,} {deepSize(packed, handlers=handlers):>12,}')

<class 'tuple'>      677,617      677,617
   5,421,310    2,844,186


In [136]:
orig = TF.features['punc'].data
packed = ADictStr(orig, threshold=10000, amount=5)
lOrig = len(orig)
lPacked = packed.length
dsOrig = deepSize(orig)
dsPacked = deepSize(packed, handlers=handlers)
print(f'{lOrig:>12,} {lPacked:>12,}')
print(f'{dsOrig:>12,} {dsPacked:>12,}') 
print(f'{int(round(dsOrig/lOrig))} {int(round(dsPacked/lPacked))}') 

gapped =      471,054 of      500,981
      29,927       29,927
   4,423,260    2,100,364
148 70


In [139]:
orig = TF.features['vac'].data
packed = ADictInt(orig, threshold=10000, amount=5)
lOrig = len(orig)
lPacked = packed.length
dsOrig = deepSize(orig)
dsPacked = deepSize(packed, handlers=handlers)
print(f'{lOrig:>12,} {lPacked:>12,}')
print(f'{dsOrig:>12,} {dsPacked:>12,}') 
print(f'{int(round(dsOrig/lOrig))} {int(round(dsPacked/lPacked))}') 

gapped =    1,425,892 of    1,429,416
       3,524        3,524
     246,232    5,887,428
70 1671


In [327]:
print(''.join(chr(x) for x in packed.data))

clusterfragmentlexlinescrollword


In [320]:
packed = ADictInt(orig, threshold=1000, amount=2)
print(f'{type(orig)} {len(orig):>12,} {packed.length:>12,}')
print(f'{deepSize(orig):>12,} {deepSize(packed, handlers=handlers):>12,}')
packed.offsets

TypeError: an integer is required (got type str)

In [275]:
for (b, e, off) in packed.offsets:
  print(f'orig   {b:>10,} => {timeit("orig.get(b + 10, None)", globals=globals(), number=1000000)}')
  print(f'packed {b:>10,} => {timeit("packed.get(b + 10)", globals=globals(), number=1000000)}')

orig        4,982 => 0.12814535399957094
packed      4,982 => 0.39552630800062616
orig      838,206 => 0.11129890800111752
packed    838,206 => 0.4424262680004176
orig      859,907 => 0.11307854299957398
packed    859,907 => 0.5167321080007241


In [231]:
diff1 = []
diff2 = []

for n in range(1, maxNode + 1):
  if n in orig:
    if orig[n] != packed.get(n):
      diff1.append(n)
  else:
    if packed.get(n) is not None:
      diff2.append(n)
      
print(len(diff1), len(diff2))

0 0


In [18]:
class ADictStr(object):
  def __init__(self, dictInt):
    keys = array('I')
    pointers = array('I')
    data = array('B')
    data.append(0)
    pointers.append(0)
    pointer = 1
    offset = 0
    keys = sorted(dictInt)
    firstKey = keys[0]
    lastKey = keys[-1]
    offset = firstKey - 1
    for n in range(firstKey, lastKey + 1):
      m = n - offset
      if n in dictInt:
        keys.append(m)
        pointers.append(pointer)
        val = dictInt[n]
        data.extend(list(val))
        pointer += len(val)
      else:
        pointers.append(0)
    self.keys = keys
    self.pointers = pointers
    self.data = data
    self.length = len(keys)
    self._length = len(pointers)
    self.offset = offset
    pointers.append(len(data))
    
  def get(self, n):
    pointers = self.pointers
    data = self.data
    offset = self.offset
    m = n - offset
    if m > self._length:
      return None
    pointer = pointers[m]
    if pointer == 0:
      return None
    return ''.join(data[''.pointers[m]:pointers[m + 1]])
  
  def item(self):
    keys = self.keys
    pointers = self.pointers
    data = self.data
    offset = self.offset
    for m in keys:
      n = m + offset
      yield (n, ''.join(data[pointers[m]:pointers[m + 1]]))

In [19]:
class ATupTupInt(object):
  def __init__(self, tupOfTup):
    pointers = array('I')
    data = array('I')
    pointer = 0
    for tup in tupOfTup:
      pointers.append(pointer)
      data.extend(tup)
      pointer += len(tup)
    self.keys = ()
    self.pointers = pointers
    self.data = data
    self.length = len(pointers)
    pointers.append(len(data))
    
  def get(self, n):
    pointers = self.pointers
    data = self.data
    if n >= self.length:
      return None
    return data[pointers[n]:pointers[n + 1]]
  
  def item(self):
    pointers = self.pointers
    data = self.data
    for n in range(self.length):
      yield data[pointers[n]:pointers[n + 1]]

In [29]:
def runDictInt(data):
  total = 0
  indent(reset=True)
  for (n, v) in data.items():
    total += v
  info(total)
  
def runPacked1Int(data):
  total = 0
  indent(reset=True)
  for n in data.keys:
    total += data.get(n + data.offset)
    break
  info(total)
  return TF.tm._elapsed()
  
def runPacked2Int(data):
  total = 0
  indent(reset=True)
  for (n, v) in data.item():
    total += v
  info(total)
  return TF.tm._elapsed()

In [30]:
data = TF.features['rec'].data
packed = ADictInt(data)

In [31]:
min(data.keys())

4982

In [33]:
runPacked1Int(packed)

  0.00s 659150


' 0.00s'

In [34]:
packed.get(4982)

1

In [35]:
packed.keys[0]

1

In [27]:
def testFeature(ft):
  info = TF.features[ft]
  origData = info.data
  valType = getattr(info, 'metaData', {}).get('valueType', None)
  
  print(f'Feature "{ft}" ({valType})')

  packedData = ADictStr(origData) if valType == 'str' else ADictInt(origData) if valType == 'int' else ATupTupInt(origData)
  testMember = list(origData.keys())[0] if valType else 0
  
  origVal = origData[testMember]
  packedVal = packedData.get(testMember)
  
  equal = origVal == packedVal
  
  origLen = len(origData)
  packedLen = packedData.length
  
  origSize = deepSize(origData)
  packedSize = deepSize((packedData.keys, packedData.pointers, packedData.data))
  
  itemSize = packedData.data.itemsize
  
  runOrig = runDictInt(origData)
  runPacked1 = runPacked1Int(packedData)
  runPacked2 = runPacked2Int(packedData)
        
  md = f'''
---

# {ft}

title | info | origData | packedData | status
--- | --- | --- | --- | ---
value | {testMember} | {origVal} | {packedVal} | {'OK' if equal else 'XX'}
data | n-items | {origLen:,} | {packedLen:,} | -- 
data | size | {origSize:,} | {packedSize:,} | -- 
item | size | -- | {itemSize} | --
run | acces all data | {runOrig} | {runPacked2} | {runPacked1}
'''
  dm(md)
  
  
  

In [28]:
testFeature('rec')

Feature "rec" (int)
  0.03s 661786


TypeError: unsupported operand type(s) for +=: 'int' and 'NoneType'

# WARP features

Every TF dataset has the `otype` and `oslots` features.
Let's see how much memory space they consume.

However, this is not the whole story. We need a deep, recursive `getsizeof` and we use the approach from this
[recipe](https://code.activestate.com/recipes/577504/)

In [74]:
total = 0
for (ft, ftInfo) in TF.features.items():
  data = ftInfo.data
  if data is None:
    print(f'{ft:<15} None')
    continue
  lData = len(data)
  tData = type(data)
  sData = deepSize(data)
  total += sData
  print(f'{ft:<15} {lData:>10,}x => {sData:>12,} b {total:>12,} {tData}')

after              798,546x =>   64,302,482 b   64,302,482 <class 'dict'>
alt                    577x =>       34,712 b   64,337,194 <class 'dict'>
biblical           267,671x =>   17,980,732 b   82,317,926 <class 'dict'>
book               218,186x =>   27,923,866 b  110,241,792 <class 'dict'>
chapter            218,186x =>   24,153,921 b  134,395,713 <class 'dict'>
cl                 312,839x =>   35,669,337 b  170,065,050 <class 'dict'>
cl2                  1,334x =>       76,598 b  170,141,648 <class 'dict'>
cor                  5,698x =>      454,636 b  170,596,284 <class 'dict'>
fragment            64,077x =>    7,498,336 b  178,094,620 <class 'dict'>
full               500,992x =>   75,471,577 b  253,566,197 <class 'dict'>
fulle              500,992x =>   56,845,294 b  310,411,491 <class 'dict'>
fullo              500,992x =>   56,022,086 b  366,433,577 <class 'dict'>
glex               481,281x =>   73,637,279 b  440,070,856 <class 'dict'>
glexe              481,281x =>   58,50

In [75]:
total = 0
seen = set()

for (ft, ftInfo) in TF.features.items():
  data = ftInfo.data
  if data is None:
    print(f'{ft:<15} None')
    continue
  lData = len(data)
  tData = type(data)
  sData = deepSize(data)
  total += sData
  print(f'{ft:<15} {lData:>10,}x => {sData:>12,} b {total:>12,} {tData}')


after              798,546x =>   64,302,482 b   64,302,482 <class 'dict'>
alt                    577x =>       34,712 b   64,337,194 <class 'dict'>
biblical           267,671x =>   17,980,732 b   82,317,926 <class 'dict'>
book               218,186x =>   27,923,866 b  110,241,792 <class 'dict'>
chapter            218,186x =>   24,153,921 b  134,395,713 <class 'dict'>
cl                 312,839x =>   35,669,337 b  170,065,050 <class 'dict'>
cl2                  1,334x =>       76,598 b  170,141,648 <class 'dict'>
cor                  5,698x =>      454,636 b  170,596,284 <class 'dict'>
fragment            64,077x =>    7,498,336 b  178,094,620 <class 'dict'>
full               500,992x =>   75,471,577 b  253,566,197 <class 'dict'>
fulle              500,992x =>   56,845,294 b  310,411,491 <class 'dict'>
fullo              500,992x =>   56,022,086 b  366,433,577 <class 'dict'>
glex               481,281x =>   73,637,279 b  440,070,856 <class 'dict'>
glexe              481,281x =>   58,50

In [20]:
od = TF.features['__order__'].data
od.itemsize

4

In [17]:
x = 1000000000
print(f'{x:>7,}')

1,000,000,000


In [28]:
from array import array

In [29]:
data = (
  (1, 2, 3),
  (4, 5, 6, 7),
  (8,),
  (9, 10),
  (100000000, 1000000000),
)

In [30]:
x = array('I', data[-1])

In [31]:
x

array('I', [100000000, 1000000000])

In [34]:
def arrayify(tupOfTup):
  pointers = array('I')
  data = array('I')
  pointer = 0
  for tup in tupOfTup:
    pointers.append(pointer)
    data.extend(tup)
    pointer += len(tup)
  return (pointers, data)

In [36]:
aData = arrayify(data)
aData

(array('I', [0, 3, 7, 8, 10]),
 array('I', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 100000000, 1000000000]))

In [69]:
def geta(aData, n):
  (pointers, data) = aData
  nPointers = len(pointers) - 1
  if n > nPointers:
    return None
  pointer = pointers[n]
  nextPointer = len(data) if n + 1 > nPointers else pointers[n + 1]
  return data[pointer:nextPointer]

In [38]:
geta(aData, 0)

(1, 2, 3)

In [39]:
for i in range(5):
  print(geta(aData, i))

(1, 2, 3)
(4, 5, 6, 7)
(8,)
(9, 10)
(100000000, 1000000000)


In [40]:
deepSize(data)

760

In [41]:
deepSize(aData)

288

In [104]:
data = tuple(tuple(range(i, i + 5)) for i in range(1000000))

In [43]:
deepSize(data)

235971540

In [56]:
aData = arrayify(data)

In [57]:
deepSize(aData)

25122300

In [58]:
indent(reset=True)
total = 0
for x in data:
  total += x[0]
info(total)

  0.12s 499999500000


In [70]:
indent(reset=True)
total = 0
for i in range(len(aData[0])):
  total += geta(aData, i)[0]
info(total)

  0.59s 499999500000


In [102]:
maxNode = F.otype.maxNode

In [157]:
recData = TF.features['rec'].data

In [174]:
orec = ADictInt(recData)

In [146]:
deepSize((orec.keys, orec.pointers, orec.data))

13823792

In [168]:
total = 0
indent(reset=True)
for (n, rec) in recData.items():
  total += rec
info(total)

  0.08s 661786


In [177]:
corData = TF.features['cor'].data

In [178]:
ocor = ADictInt(corData)

In [179]:
deepSize((ocor.keys, ocor.pointers, ocor.data))

8516840

In [180]:
total = 0
indent(reset=True)
for (n, rec) in corData.items():
  total += rec
info(total)

  0.00s 15139


In [169]:
min(recData.keys())

4982

In [170]:
orec.length

2107857

In [171]:
orec.pointers[4981]

0

In [181]:
total = 0
indent(reset=True)
for n in ocor.keys:
  total += ocor.get(n)
info(total)

  0.00s 15139


In [182]:
total = 0
indent(reset=True)
for (n, x) in ocor.item():
  total += x
info(total)

  0.00s 15139


In [92]:
osData[0]

(1,)

In [94]:
oacData = ATuple(osData[0:-1])

In [100]:
for ft in ('__levUp__', '__levDown__', '__boundary__', '__sections__'):
  print(ft)
  aData = ATuple(TF.features[ft].data)
  print(f'{deepSize((aData.pointers, aData.data)):>12,}')

__levUp__
  52,001,232
__levDown__
  14,313,712
__boundary__


TypeError: an integer is required (got type tuple)

In [131]:
acData = ATupTupInt(data)

In [132]:
acData.get(10)

array('I', [10, 11, 12, 13, 14])

In [133]:
indent(reset=True)
total = 0
for i in range(acData.length):
  total += acData.get(i)[0]
info(total)

  0.57s 499999500000


In [134]:
indent(reset=True)
total = 0
for x in acData.elem():
  total += x[0]
info(total)

  0.37s 499999500000


In [135]:
indent(reset=True)
total = 0
for x in acData.elem2():
  total += x[0]
info(total)

  0.38s 499999500000
