### Python module/package imports for this chapter

In [1]:
import sys, math, collections, itertools, multiprocessing, gzip

In [2]:
import numpy as np

import matplotlib
import matplotlib.pyplot as pp

%matplotlib inline

In [3]:
%load_ext line_profiler
%load_ext memory_profiler

## Profiling memory
##### Python List vs Numpy Array
Python List
- implemented with dynamic arrays
- consecutively stored pointers referring to Python objects stored elsewhere
- Python float is actually an object (24 bytes + 8 byte pointer)

Numpy Array
- contiguous region of memory where all of the objects (same-type) are stored together; a float == a float (only 8 bytes)

In [4]:
vector_list = [float(i) for i in range(100000)]

In [5]:
vector_np = np.arange(0, 100000, dtype = 'd')

In [9]:
%%file memory.py

import numpy as np

@profile
def allocate():
    vector_list = [float(i) for i in range(100000)]
    vector_np = np.arange(0, 100000, dtype = 'd')
    
allocate()

Overwriting memory.py


In [11]:
!python -m memory_profiler memory.py

# Numpy array is not winning anymore after Python 3.7

Filename: memory.py

Line #    Mem usage    Increment   Line Contents
     4   53.031 MiB   53.031 MiB   @profile
     5                             def allocate():
     6   56.855 MiB    0.016 MiB       vector_list = [float(i) for i in range(100000)]
     7   57.625 MiB    0.770 MiB       vector_np = np.arange(0, 100000, dtype = 'd')




In [29]:
%%file memory2.py

import numpy as np, collections

@profile
def allocate():
    # 1
    dict_list = [{'x': 1.0 * i, 'y': 2.5 * i} for i in range(100000)]
    
    # 2
    class xy_class(object):
        def __init__(self, x, y):
            self.x, self.y = x, y

    class_list = [xy_class(1.0 * i, 2.5 * i) for i in range(100000)]
    
    # 3
    class xy_slots(object):
        __slots__ = ['x', 'y']

        def __init__(self, x, y):
            self.x, self.y = x, y
        
    slots_list = [xy_slots(1.0 * i, 2.5 * i) for i in range(100000)]
    
    # 4
    xy_namedtuple = collections.namedtuple('xy', ['x', 'y'])
    namedtuple_list = [xy_namedtuple(1.0 * i, 2.5 * i) for i in range(100000)]
    
    # 5
    tuple_list = [(1.0 * i, 2.5 * i) for i in range(100000)]
    
    # 6
    list_list = [[1.0 * i, 2.5 * i] for i in range(100000)]
    
    # 7
    record_np = np.fromiter(((1.0 * i, 2.5 * i) for i in range(100000)),
                       dtype = [('x', 'd'), ('y', 'd')])
    
allocate()

Overwriting memory2.py


In [30]:
!python -m memory_profiler memory2.py

# Numpy array is not winning anymore after Python 3.7

Filename: memory2.py

Line #    Mem usage    Increment   Line Contents
     4   53.039 MiB   53.039 MiB   @profile
     5                             def allocate():
     6                                 # 1
     7   84.652 MiB    0.016 MiB       dict_list = [{'x': 1.0 * i, 'y': 2.5 * i} for i in range(100000)]
     8                                 
     9                                 # 2
    10   84.656 MiB    0.004 MiB       class xy_class(object):
    11  111.273 MiB    0.023 MiB           def __init__(self, x, y):
    12  111.273 MiB    0.008 MiB               self.x, self.y = x, y
    13                             
    14  111.273 MiB    0.699 MiB       class_list = [xy_class(1.0 * i, 2.5 * i) for i in range(100000)]
    15                                 
    16                                 # 3
    17  111.273 MiB    0.000 MiB       class xy_slots(object):
    18  111.273 MiB    0.000 MiB           __slots__ = ['x', 'y']
    19                          