# Advanced NumPy

In [1]:
from __future__ import division
from numpy.random import randn
from pandas import Series
import numpy as np
np.set_printoptions(precision=4)
import sys

## ndarray object internals

### NumPy dtype hierarchy

In [2]:
ints = np.ones(10, dtype=np.uint16)
floats = np.ones(10, dtype=np.float32)
print(np.issubdtype(ints.dtype, np.integer))
np.issubdtype(floats.dtype, np.floating)

True


True

In [3]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

## Advanced array manipulation

### Reshaping arrays

In [5]:
arr = np.arange(8)
print(arr)
arr.reshape((4, 2))

[0 1 2 3 4 5 6 7]


array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [6]:
arr.reshape((4, 2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [7]:
arr = np.arange(15)
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [8]:
other_arr = np.ones((3, 5))
print(other_arr.shape)
arr.reshape(other_arr.shape)

(3, 5)


array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [9]:
arr = np.arange(15).reshape((5, 3))
print(arr)
arr.ravel()

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [10]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

### C vs. Fortran order

In [11]:
arr = np.arange(12).reshape((3, 4))
print(arr)
print(arr.ravel())
arr.ravel('F')

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[ 0  1  2  3  4  5  6  7  8  9 10 11]


array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

### Concatenating and splitting arrays

In [12]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8, 9], [10, 11, 12]])
print(np.concatenate([arr1, arr2], axis=0))
np.concatenate([arr1, arr2], axis=1)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [13]:
print(np.vstack((arr1, arr2)))
np.hstack((arr1, arr2))

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [15]:
from numpy.random import randn
arr = randn(5, 2)
print("arr=",arr)
first, second, third = np.split(arr, [1, 3])
print("first=",first)
print("second=",second)
third

arr= [[ 0.6029  1.1805]
 [ 0.302   0.4134]
 [-0.2564  1.1446]
 [-0.5833 -0.0263]
 [ 0.5613  0.6392]]
first= [[ 0.6029  1.1805]]
second= [[ 0.302   0.4134]
 [-0.2564  1.1446]]


array([[-0.5833, -0.0263],
       [ 0.5613,  0.6392]])

#### Stacking helpers: 

In [16]:
arr = np.arange(6)
arr1 = arr.reshape((3, 2))
arr2 = randn(3, 2)
print(np.r_[arr1, arr2])
np.c_[np.r_[arr1, arr2], arr]

[[ 0.      1.    ]
 [ 2.      3.    ]
 [ 4.      5.    ]
 [-0.0911  0.8422]
 [-0.1209 -1.2454]
 [-0.6448  0.509 ]]


array([[ 0.    ,  1.    ,  0.    ],
       [ 2.    ,  3.    ,  1.    ],
       [ 4.    ,  5.    ,  2.    ],
       [-0.0911,  0.8422,  3.    ],
       [-0.1209, -1.2454,  4.    ],
       [-0.6448,  0.509 ,  5.    ]])

In [17]:
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

### Repeating elements: tile and repeat

In [18]:
arr = np.arange(3)
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [19]:
arr.repeat([2, 3, 4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [20]:
arr = randn(2, 2)
print(arr)
arr.repeat(2, axis=0)

[[ 1.0471 -0.3387]
 [ 0.3776  1.7863]]


array([[ 1.0471, -0.3387],
       [ 1.0471, -0.3387],
       [ 0.3776,  1.7863],
       [ 0.3776,  1.7863]])

In [21]:
print(arr.repeat([2, 3], axis=0))
arr.repeat([2, 3], axis=1)

[[ 1.0471 -0.3387]
 [ 1.0471 -0.3387]
 [ 0.3776  1.7863]
 [ 0.3776  1.7863]
 [ 0.3776  1.7863]]


array([[ 1.0471,  1.0471, -0.3387, -0.3387, -0.3387],
       [ 0.3776,  0.3776,  1.7863,  1.7863,  1.7863]])

In [22]:
print(arr)
np.tile(arr, 2)

[[ 1.0471 -0.3387]
 [ 0.3776  1.7863]]


array([[ 1.0471, -0.3387,  1.0471, -0.3387],
       [ 0.3776,  1.7863,  0.3776,  1.7863]])

In [25]:
print("arr=",arr)
print("tile1=",np.tile(arr, (2, 1)))
np.tile(arr, (3, 2))

arr= [[ 1.0471 -0.3387]
 [ 0.3776  1.7863]]
tile1= [[ 1.0471 -0.3387]
 [ 0.3776  1.7863]
 [ 1.0471 -0.3387]
 [ 0.3776  1.7863]]


array([[ 1.0471, -0.3387,  1.0471, -0.3387],
       [ 0.3776,  1.7863,  0.3776,  1.7863],
       [ 1.0471, -0.3387,  1.0471, -0.3387],
       [ 0.3776,  1.7863,  0.3776,  1.7863],
       [ 1.0471, -0.3387,  1.0471, -0.3387],
       [ 0.3776,  1.7863,  0.3776,  1.7863]])

### Fancy indexing equivalents: take and put

In [26]:
arr = np.arange(10) * 100
inds = [7, 1, 2, 6]
arr[inds]

array([700, 100, 200, 600])

In [27]:
print(arr.take(inds))
print(arr.put(inds, 42))
print(arr)
arr.put(inds, [40, 41, 42, 43])
arr

[700 100 200 600]
None
[  0  42  42 300 400 500  42  42 800 900]


array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [28]:
inds = [2, 0, 2, 1]
arr = randn(2, 4)
print(arr)
arr.take(inds, axis=1)

[[ 0.5018 -2.049   0.591   0.5337]
 [ 0.1041  0.0334 -0.3553  0.5218]]


array([[ 0.591 ,  0.5018,  0.591 , -2.049 ],
       [-0.3553,  0.1041, -0.3553,  0.0334]])

## Broadcasting

In [29]:
arr = np.arange(5)
print(arr)
arr * 4

[0 1 2 3 4]


array([ 0,  4,  8, 12, 16])

In [30]:
arr = randn(4, 3)
print(arr.mean(0))
demeaned = arr - arr.mean(0)
print(demeaned)
demeaned.mean(0)

[ 0.0108 -0.3857  0.1217]
[[-0.0198 -0.6272 -0.1566]
 [-0.4057  1.2594 -1.504 ]
 [ 0.6741 -0.3867  0.773 ]
 [-0.2486 -0.2455  0.8875]]


array([  0.0000e+00,   2.7756e-17,  -2.7756e-17])

In [31]:
print(arr)
row_means = arr.mean(1)
print(row_means.reshape((4, 1)))
demeaned = arr - row_means.reshape((4, 1))
demeaned.mean(1)

[[-0.009  -1.0129 -0.0349]
 [-0.3949  0.8738 -1.3823]
 [ 0.6849 -0.7724  0.8947]
 [-0.2378 -0.6312  1.0092]]
[[-0.3523]
 [-0.3012]
 [ 0.2691]
 [ 0.0467]]


array([  3.7007e-17,   0.0000e+00,   7.4015e-17,   0.0000e+00])

### Broadcasting over other axes

In [32]:
arr - arr.mean(1)

ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

In [33]:
arr - arr.mean(1).reshape((4, 1))

array([[ 0.3432, -0.6606,  0.3174],
       [-0.0938,  1.1749, -1.0811],
       [ 0.4159, -1.0415,  0.6256],
       [-0.2846, -0.6779,  0.9625]])

In [34]:
arr = np.zeros((4, 4))
arr_3d = arr[:, np.newaxis, :]
arr_3d.shape

(4, 1, 4)

In [35]:
arr_1d = np.random.normal(size=3)
print(arr_1d[:, np.newaxis])
arr_1d[np.newaxis, :]

[[-1.6063]
 [-0.9939]
 [-0.8652]]


array([[-1.6063, -0.9939, -0.8652]])

In [36]:
arr = randn(3, 4, 5)
depth_means = arr.mean(2)
print(depth_means)
demeaned = arr - depth_means[:, :, np.newaxis]
demeaned.mean(2)

[[ 0.1305  0.7847  0.0784 -0.2949]
 [ 0.0172  0.3941 -0.943  -0.6895]
 [ 0.7085  0.187  -0.0994  0.2773]]


array([[  0.0000e+00,   2.2204e-17,  -2.2204e-17,   7.2164e-17],
       [  4.4409e-17,   0.0000e+00,   0.0000e+00,   8.8818e-17],
       [  0.0000e+00,   0.0000e+00,   0.0000e+00,  -4.4409e-17]])

In [37]:
def demean_axis(arr, axis=0):
    means = arr.mean(axis)

    # This generalized things like [:, :, np.newaxis] to N dimensions
    indexer = [slice(None)] * arr.ndim
    indexer[axis] = np.newaxis
    return arr - means[indexer]

### Setting array values by broadcasting

In [38]:
arr = np.zeros((4, 3))
arr[:] = 5
arr

array([[ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.]])

In [39]:
col = np.array([1.28, -0.42, 0.44, 1.6])
arr[:] = col[:, np.newaxis]
print(arr)
arr[:2] = [[-1.37], [0.509]]
arr

[[ 1.28  1.28  1.28]
 [-0.42 -0.42 -0.42]
 [ 0.44  0.44  0.44]
 [ 1.6   1.6   1.6 ]]


array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

## Advanced ufunc usage

### Ufunc instance methods

In [40]:
arr = np.arange(10)
print(np.add.reduce(arr))
arr.sum()

45


45

In [41]:
np.random.seed(12346)

In [42]:
arr = randn(5, 5)
arr[::2].sort(1) # sort a few rows
print(arr[:, :-1] < arr[:, 1:])
np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)

[[ True  True  True  True]
 [False  True False False]
 [ True  True  True  True]
 [ True False  True  True]
 [ True  True  True  True]]


array([ True, False,  True, False,  True], dtype=bool)

In [43]:
arr = np.arange(15).reshape((3, 5))
np.add.accumulate(arr, axis=1)

array([[ 0,  1,  3,  6, 10],
       [ 5, 11, 18, 26, 35],
       [10, 21, 33, 46, 60]])

In [44]:
arr = np.arange(3).repeat([1, 2, 2])
print(arr)
np.multiply.outer(arr, np.arange(5))

[0 1 1 2 2]


array([[0, 0, 0, 0, 0],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 2, 4, 6, 8],
       [0, 2, 4, 6, 8]])

In [45]:
result = np.subtract.outer(randn(3, 4), randn(5))
result.shape

(3, 4, 5)

In [46]:
arr = np.arange(10)
np.add.reduceat(arr, [0, 5, 8])

array([10, 18, 17])

In [47]:
arr = np.multiply.outer(np.arange(4), np.arange(5))
print(arr)
np.add.reduceat(arr, [0, 2, 4], axis=1)

[[ 0  0  0  0  0]
 [ 0  1  2  3  4]
 [ 0  2  4  6  8]
 [ 0  3  6  9 12]]


array([[ 0,  0,  0],
       [ 1,  5,  4],
       [ 2, 10,  8],
       [ 3, 15, 12]])

### Custom ufuncs

In [48]:
def add_elements(x, y):
    return x + y
add_them = np.frompyfunc(add_elements, 2, 1)
add_them(np.arange(8), np.arange(8))

array([0, 2, 4, 6, 8, 10, 12, 14], dtype=object)

In [49]:
add_them = np.vectorize(add_elements, otypes=[np.float64])
add_them(np.arange(8), np.arange(8))

array([  0.,   2.,   4.,   6.,   8.,  10.,  12.,  14.])

In [50]:
arr = randn(10000)
%timeit add_them(arr, arr)
%timeit np.add(arr, arr)

2.63 ms ± 465 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


4.15 µs ± 34.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Structured and record arrays

In [51]:
dtype = [('x', np.float64), ('y', np.int32)]
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)
sarr

array([( 1.5   ,  6), ( 3.1416, -2)],
      dtype=[('x', '<f8'), ('y', '<i4')])

In [52]:
print(sarr[0])
sarr[0]['y']

( 1.5, 6)


6

In [53]:
sarr['x']

array([ 1.5   ,  3.1416])

### Nested dtypes and multidimensional fields

In [54]:
dtype = [('x', np.int64, 3), ('y', np.int32)]
arr = np.zeros(4, dtype=dtype)
arr

array([([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0)],
      dtype=[('x', '<i8', (3,)), ('y', '<i4')])

In [55]:
arr[0]['x']

array([0, 0, 0])

In [56]:
arr['x']

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [57]:
dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)]
data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)
print(data['x'])
print(data['y'])
data['x']['a']

[( 1.,  2.) ( 3.,  4.)]
[5 6]


array([ 1.,  3.])

### Why use structured arrays?

### Structured array manipulations: numpy.lib.recfunctions

## More about sorting

In [58]:
arr = randn(6)
arr.sort()
arr

array([-1.082 ,  0.3759,  0.8014,  1.1397,  1.2888,  1.8413])

In [59]:
arr = randn(3, 5)
print(arr)
arr[:, 0].sort()  # Sort first column values in-place
arr

[[-0.3318 -1.4711  0.8705 -0.0847 -1.1329]
 [-1.0111 -0.3436  2.1714  0.1234 -0.0189]
 [ 0.1773  0.7424  0.8548  1.038  -0.329 ]]


array([[-1.0111, -1.4711,  0.8705, -0.0847, -1.1329],
       [-0.3318, -0.3436,  2.1714,  0.1234, -0.0189],
       [ 0.1773,  0.7424,  0.8548,  1.038 , -0.329 ]])

In [61]:
arr = randn(5)
print(arr)
print(np.sort(arr))
arr

[ 0.5955 -0.2682  1.3389 -0.1872  0.9111]
[-0.2682 -0.1872  0.5955  0.9111  1.3389]


array([ 0.5955, -0.2682,  1.3389, -0.1872,  0.9111])

In [62]:
arr = randn(3, 5)
print(arr)
arr.sort(axis=1)
arr

[[-0.3215  1.0054 -0.5168  1.1925 -0.1989]
 [ 0.3969 -1.7638  0.6071 -0.2222 -0.2171]
 [-1.2136 -0.8704 -0.2306  1.0438 -1.1441]]


array([[-0.5168, -0.3215, -0.1989,  1.0054,  1.1925],
       [-1.7638, -0.2222, -0.2171,  0.3969,  0.6071],
       [-1.2136, -1.1441, -0.8704, -0.2306,  1.0438]])

In [63]:
arr[:, ::-1]

array([[ 1.1925,  1.0054, -0.1989, -0.3215, -0.5168],
       [ 0.6071,  0.3969, -0.2171, -0.2222, -1.7638],
       [ 1.0438, -0.2306, -0.8704, -1.1441, -1.2136]])

### Indirect sorts: argsort and lexsort

In [64]:
values = np.array([5, 0, 1, 3, 2])
indexer = values.argsort()
print(indexer)
values[indexer]

[1 2 4 3 0]


array([0, 1, 2, 3, 5])

In [65]:
arr = randn(3, 5)
arr[0] = values
print(arr)
arr[:, arr[0].argsort()]

[[ 5.      0.      1.      3.      2.    ]
 [-0.2089  0.2316  0.728  -1.3918  1.9956]
 [-0.2981  1.2037 -0.0158  0.7439  0.8688]]


array([[ 0.    ,  1.    ,  2.    ,  3.    ,  5.    ],
       [ 0.2316,  0.728 ,  1.9956, -1.3918, -0.2089],
       [ 1.2037, -0.0158,  0.8688,  0.7439, -0.2981]])

In [67]:
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])
sorter = np.lexsort((first_name, last_name))
list(zip(last_name[sorter], first_name[sorter]))

[('Arnold', 'Jane'),
 ('Arnold', 'Steve'),
 ('Jones', 'Bill'),
 ('Jones', 'Bob'),
 ('Walters', 'Barbara')]

### Alternate sort algorithms

In [68]:
values = np.array(['2:first', '2:second', '1:first', '1:second', '1:third'])
key = np.array([2, 2, 1, 1, 1])
indexer = key.argsort(kind='mergesort')
print(indexer)
values.take(indexer)

[2 3 4 0 1]


array(['1:first', '1:second', '1:third', '2:first', '2:second'],
      dtype='<U8')

### numpy.searchsorted: Finding elements in a sorted array

In [69]:
arr = np.array([0, 1, 7, 12, 15])
arr.searchsorted(9)

3

In [70]:
arr.searchsorted([0, 8, 11, 16])

array([0, 3, 3, 5])

In [71]:
arr = np.array([0, 0, 0, 1, 1, 1, 1])
print(arr.searchsorted([0, 1]))
arr.searchsorted([0, 1], side='right')

[0 3]


array([3, 7])

In [72]:
data = np.floor(np.random.uniform(0, 10000, size=50))
bins = np.array([0, 100, 1000, 5000, 10000])
data

array([ 2673.,  6152.,  2774.,  5130.,  9553.,  4997.,  1794.,  9688.,
         426.,  1612.,   651.,  8653.,  1695.,  4764.,  1052.,  4836.,
        8020.,  3479.,  1513.,  5872.,  8992.,  7656.,  4764.,  5383.,
        2319.,  4280.,  4150.,  8601.,  3946.,  9904.,  7286.,  9969.,
        6032.,  4574.,  8480.,  4298.,  2708.,  7358.,  6439.,  7916.,
        3899.,  9182.,   871.,  7973.,  1360.,  1676.,  6289.,  9587.,
        6697.,  6341.])

In [73]:
labels = bins.searchsorted(data)
labels

array([3, 4, 3, 4, 4, 3, 3, 4, 2, 3, 2, 4, 3, 3, 3, 3, 4, 3, 3, 4, 4, 4, 3,
       4, 3, 3, 3, 4, 3, 4, 4, 4, 4, 3, 4, 3, 3, 4, 4, 4, 3, 4, 2, 4, 3, 3,
       4, 4, 4, 4])

In [74]:
Series(data).groupby(labels).mean()

2     649.333333
3    3143.772727
4    7726.120000
dtype: float64

In [75]:
np.digitize(data, bins)

array([3, 4, 3, 4, 4, 3, 3, 4, 2, 3, 2, 4, 3, 3, 3, 3, 4, 3, 3, 4, 4, 4, 3,
       4, 3, 3, 3, 4, 3, 4, 4, 4, 4, 3, 4, 3, 3, 4, 4, 4, 3, 4, 2, 4, 3, 3,
       4, 4, 4, 4])

## NumPy matrix class

In [76]:
X =  np.array([[ 8.82768214,  3.82222409, -1.14276475,  2.04411587],
               [ 3.82222409,  6.75272284,  0.83909108,  2.08293758],
               [-1.14276475,  0.83909108,  5.01690521,  0.79573241],
               [ 2.04411587,  2.08293758,  0.79573241,  6.24095859]])
print(X[:, 0])  # one-dimensional
y = X[:, :1]  # two-dimensional by slicing
print(X)
y

[ 8.8277  3.8222 -1.1428  2.0441]
[[ 8.8277  3.8222 -1.1428  2.0441]
 [ 3.8222  6.7527  0.8391  2.0829]
 [-1.1428  0.8391  5.0169  0.7957]
 [ 2.0441  2.0829  0.7957  6.241 ]]


array([[ 8.8277],
       [ 3.8222],
       [-1.1428],
       [ 2.0441]])

In [77]:
np.dot(y.T, np.dot(X, y))

array([[ 1195.468]])

In [78]:
Xm = np.matrix(X)
ym = Xm[:, 0]
print(Xm)
print(ym)
ym.T * Xm * ym

[[ 8.8277  3.8222 -1.1428  2.0441]
 [ 3.8222  6.7527  0.8391  2.0829]
 [-1.1428  0.8391  5.0169  0.7957]
 [ 2.0441  2.0829  0.7957  6.241 ]]
[[ 8.8277]
 [ 3.8222]
 [-1.1428]
 [ 2.0441]]


matrix([[ 1195.468]])

In [79]:
Xm.I * X

matrix([[  1.0000e+00,  -2.0817e-17,  -5.5511e-17,   5.5511e-17],
        [  1.5266e-16,   1.0000e+00,   5.5511e-17,   5.5511e-17],
        [  1.1102e-16,   2.7756e-17,   1.0000e+00,   1.3878e-17],
        [ -5.5511e-17,   0.0000e+00,   3.4694e-18,   1.0000e+00]])

## Advanced array input and output

### Memory-mapped files

In [80]:
mmap = np.memmap('mymmap', dtype='float64', mode='w+', shape=(10000, 10000))
mmap

memmap([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        ..., 
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [81]:
section = mmap[:5]

In [82]:
section[:] = np.random.randn(5, 10000)
mmap.flush()
print(mmap)
del mmap

[[-0.4286 -1.2487  0.1909 ..., -0.7684 -1.4242 -0.1276]
 [-1.4148  1.2986  0.8006 ..., -0.7444  0.4645 -0.5043]
 [ 0.7843 -0.8663  0.7534 ..., -0.6288 -1.0877  0.6596]
 ..., 
 [ 0.      0.      0.     ...,  0.      0.      0.    ]
 [ 0.      0.      0.     ...,  0.      0.      0.    ]
 [ 0.      0.      0.     ...,  0.      0.      0.    ]]


In [83]:
mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))
mmap

memmap([[-0.4286, -1.2487,  0.1909, ..., -0.7684, -1.4242, -0.1276],
        [-1.4148,  1.2986,  0.8006, ..., -0.7444,  0.4645, -0.5043],
        [ 0.7843, -0.8663,  0.7534, ..., -0.6288, -1.0877,  0.6596],
        ..., 
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ]])

In [84]:
%xdel mmap
!rm mymmap

### HDF5 and other array storage options

## Performance tips

### The importance of contiguous memory

In [85]:
arr_c = np.ones((1000, 1000), order='C')
arr_f = np.ones((1000, 1000), order='F')
print("cflags=",arr_c.flags)
print("fflags=",arr_f.flags)
arr_f.flags.f_contiguous

cflags=   C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False
fflags=   C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False


True

In [86]:
%timeit arr_c.sum(1)
%timeit arr_f.sum(1)

467 µs ± 58.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


456 µs ± 7.17 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [87]:
arr_f.copy('C').flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [88]:
print(arr_c[:50].flags.contiguous)
arr_c[:, :50].flags

True


  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [89]:
%xdel arr_c
%xdel arr_f
%cd ..

/Users/imc023/Documents/PycharmProjects/pydata-book


## Other speed options: Cython, f2py, C

```cython
from numpy cimport ndarray, float64_t

def sum_elements(ndarray[float64_t] arr):
    cdef Py_ssize_t i, n = len(arr)
    cdef float64_t result = 0

    for i in range(n):
        result += arr[i]

    return result
```