# Advanced NumPy

In [13]:
from __future__ import division
from numpy.random import randn
from pandas import Series
import numpy as np
np.set_printoptions(precision=4)
import sys

In [14]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

## ndarray object internals

### NumPy dtype hierarchy

In [2]:
ints = np.ones(10, dtype=np.uint16)
floats = np.ones(10, dtype=np.float32)
np.issubdtype(ints.dtype, np.integer)
np.issubdtype(floats.dtype, np.floating)

True

In [3]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

## Advanced array manipulation

### Reshaping arrays

In [8]:
arr = np.arange(8)
arr
arr.reshape((4, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [9]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [7]:
arr.reshape((4, 2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [11]:
arr = np.arange(15)
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [13]:
other_arr = np.ones((3, 5))
print(other_arr.shape)
arr.reshape(other_arr.shape)

(3, 5)


array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [17]:
arr = np.arange(15).reshape((1, 5, 3))
print(arr)
arr.ravel()

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]
  [ 9 10 11]
  [12 13 14]]]


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [18]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

### C vs. Fortran order

In [26]:
arr = np.arange(12).reshape((3, 4))
print(arr)
print(arr.ravel()) # 평탄화를 함!! default가 Column이지!!
print(arr.ravel('F'))
print(arr.ravel('C'))

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[ 0  1  2  3  4  5  6  7  8  9 10 11]
[ 0  4  8  1  5  9  2  6 10  3  7 11]
[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [28]:
arr2 = np.arange(12).reshape((3,4), order='F') # order를 생략하면 C이다!!
arr2

array([[ 0,  3,  6,  9],
       [ 1,  4,  7, 10],
       [ 2,  5,  8, 11]])

### Concatenating and splitting arrays

In [None]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8, 9], [10, 11, 12]])

In [31]:
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [32]:
np.concatenate([arr1, arr2], axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [34]:
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [35]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [2]:
from numpy.random import randn
arr = randn(5, 2)
arr

array([[ 0.27584703, -0.93121051],
       [-0.520725  , -1.36911165],
       [ 0.78794003,  1.20637365],
       [ 0.59101229, -1.16472032],
       [ 0.260708  , -0.29091743]])

In [4]:
first, second, third = np.split(arr, [1, 3]) #분리를 했네. 누적으로 분리했네,

In [5]:
first

array([[ 0.27584703, -0.93121051]])

In [6]:
second

array([[-0.520725  , -1.36911165],
       [ 0.78794003,  1.20637365]])

In [7]:
third

array([[ 0.59101229, -1.16472032],
       [ 0.260708  , -0.29091743]])

#### Stacking helpers: 

In [44]:
arr = np.arange(6)
arr1 = arr.reshape((3, 2))
arr1

array([[0, 1],
       [2, 3],
       [4, 5]])

In [45]:
arr2 = randn(3, 2)
arr2

array([[ 1.6999, -0.7665],
       [ 0.5345, -1.0468],
       [-0.4797,  0.5768]])

In [46]:
np.r_[arr1, arr2]

array([[ 0.    ,  1.    ],
       [ 2.    ,  3.    ],
       [ 4.    ,  5.    ],
       [ 1.6999, -0.7665],
       [ 0.5345, -1.0468],
       [-0.4797,  0.5768]])

In [47]:
np.c_[np.r_[arr1, arr2], arr]

array([[ 0.    ,  1.    ,  0.    ],
       [ 2.    ,  3.    ,  1.    ],
       [ 4.    ,  5.    ,  2.    ],
       [ 1.6999, -0.7665,  3.    ],
       [ 0.5345, -1.0468,  4.    ],
       [-0.4797,  0.5768,  5.    ]])

In [48]:
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

### Repeating elements: tile and repeat

In [10]:
arr = np.arange(3)
arr

array([0, 1, 2])

In [11]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [12]:
arr.repeat([2, 3, 4]) # 원소별로 반복개수 지정!!

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [15]:
arr = randn(2, 2)
arr
arr.repeat(2, axis=0)

array([[-2.1311, -0.4022],
       [ 0.6876,  1.5941]])

array([[-2.1311, -0.4022],
       [-2.1311, -0.4022],
       [ 0.6876,  1.5941],
       [ 0.6876,  1.5941]])

In [16]:
arr.repeat([2, 3], axis=0) # 열로 2,3 개씩 반복해 주네!!
arr.repeat([2, 3], axis=1)

array([[-2.1311, -0.4022],
       [-2.1311, -0.4022],
       [ 0.6876,  1.5941],
       [ 0.6876,  1.5941],
       [ 0.6876,  1.5941]])

array([[-2.1311, -2.1311, -0.4022, -0.4022, -0.4022],
       [ 0.6876,  0.6876,  1.5941,  1.5941,  1.5941]])

In [19]:
arr
np.tile(arr, 2) # 통으로 복사하네!!

array([[-2.1311, -0.4022],
       [ 0.6876,  1.5941]])

array([[-2.1311, -0.4022, -2.1311, -0.4022],
       [ 0.6876,  1.5941,  0.6876,  1.5941]])

In [18]:
arr
np.tile(arr, (2, 1)) # 2행 1열로 통으로 복사!!
np.tile(arr, (3, 2)) # 3행 2열로 통으로 복사!!
# tile이지!! 타일 생각해!!

array([[-2.1311, -0.4022],
       [ 0.6876,  1.5941]])

array([[-2.1311, -0.4022],
       [ 0.6876,  1.5941],
       [-2.1311, -0.4022],
       [ 0.6876,  1.5941]])

array([[-2.1311, -0.4022, -2.1311, -0.4022],
       [ 0.6876,  1.5941,  0.6876,  1.5941],
       [-2.1311, -0.4022, -2.1311, -0.4022],
       [ 0.6876,  1.5941,  0.6876,  1.5941],
       [-2.1311, -0.4022, -2.1311, -0.4022],
       [ 0.6876,  1.5941,  0.6876,  1.5941]])

### Fancy indexing equivalents: take and put

In [23]:
arr = np.arange(10) * 100
inds = [7, 1, 2, 6]
arr
arr[inds]

array([  0, 100, 200, 300, 400, 500, 600, 700, 800, 900])

array([700, 100, 200, 600])

In [26]:
arr.take(inds) # arr[inds] 랑 같네!! 결과 같음!! take 쓸 일이 별로 없다.
arr.put(inds, 42) # inds index 위치에 42라는 scalar 값을 넣네!!
arr
arr.put(inds, [40, 41, 42, 43]) # list 형태로 줘서 넣네!!
arr
arr.put(inds, [40, 41, 42]) # cycle로 list가 계속 도네!!
arr
# take나 put이 있는 이유는, 

array([40, 41, 42, 40])

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

array([  0,  41,  42, 300, 400, 500,  40,  40, 800, 900])

In [29]:
inds = [2, 0, 2, 1]
arr = randn(2, 4)
arr
arr.take(inds, axis=1) # select 하네, inds에 맞게!!

array([[-0.2288, -1.4522,  0.0946,  1.3995],
       [-1.0493,  0.2133,  0.2429,  0.6602]])

array([[ 0.0946, -0.2288,  0.0946, -1.4522],
       [ 0.2429, -1.0493,  0.2429,  0.2133]])

## Broadcasting

In [32]:
a = np.array([1.0, 2.0, 3.0])
b = np.array([2.0, 2.0, 2.0])
a*b

array([ 2.,  4.,  6.])

In [36]:
a = np.array([1.0, 2.0, 3.0])
b = 2.0
a*b
# 브로드캐스팅 시 b의 값이 복사되는 것이 아니므로 메모리 사용과 계산이 효율적으로 이루어짐!
# 이게 전자보다 효율적임!

array([ 2.,  4.,  6.])

### General Broadcasting Rules

- 원소에 대해 두 배열의 형상이 호환되는지 확인
- 끝 차원부터 시작하여 앞으로 짚어감
- 2차원 배열은 다음 조건을 맍고하면 호환된다.
  - 같거나(equal)
  - 한차원이 1
- 두 배열이 같은 차원일 필요 없음
  - 256x256x3 RGB 배열(이미지)의 색상 값을 변경
- 비교되는 차원 중 하나가 1이면 다른 차원 값이 사용됨, 즉. 1쪽의 값이 늘어나거나 "복사" 됨

TODO:: 이부분 예제 있다 붙여라!! 강사님 txt에!!


In [39]:
x = np.arange(4); x
xx = x.reshape(4,1); xx
y = np.ones(5); y
z = np.ones((3,4)); z # tuple이지!!

array([0, 1, 2, 3])

array([[0],
       [1],
       [2],
       [3]])

array([ 1.,  1.,  1.,  1.,  1.])

array([[ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.]])

In [41]:
x.shape
y.shape
x+y # 4랑 5는 안되!!

(4,)

(5,)

ValueError: operands could not be broadcast together with shapes (4,) (5,) 

In [46]:
xx # 4x1
y  # 1x5 지!!
xx.shape # 행, 열
y.shape  # 열
xx+y

array([[0],
       [1],
       [2],
       [3]])

array([ 1.,  1.,  1.,  1.,  1.])

(4, 1)

(5,)

array([[ 1.,  1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.,  4.]])

In [49]:
(xx + y).shape
xx+y

(4, 5)

array([[ 1.,  1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.,  4.]])

In [51]:
x
z
x.shape
z.shape
(x+z).shape
x+z

array([0, 1, 2, 3])

array([[ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.]])

(4,)

(3, 4)

(3, 4)

array([[ 1.,  2.,  3.,  4.],
       [ 1.,  2.,  3.,  4.],
       [ 1.,  2.,  3.,  4.]])

In [52]:
arr = np.arange(5)
arr
arr * 4

array([0, 1, 2, 3, 4])

array([ 0,  4,  8, 12, 16])

In [60]:
arr = randn(4, 3)
print("- arr")
arr
print("- arr_mean")
arr.mean(0)
demeaned = arr - arr.mean(0)
print("- demeaned")
demeaned
print("- demeaned_mean")
demeaned.mean(0)

- arr


array([[-0.4588,  0.9817,  1.0378],
       [-0.3054,  0.1685, -0.5494],
       [ 1.4433, -0.3236, -1.0287],
       [ 0.6621, -0.5349, -1.0365]])

- arr_mean


array([ 0.3353,  0.0729, -0.3942])

- demeaned


array([[-0.7941,  0.9088,  1.432 ],
       [-0.6407,  0.0955, -0.1552],
       [ 1.108 , -0.3965, -0.6345],
       [ 0.3268, -0.6078, -0.6422]])

- demeaned_mean


array([ -2.7756e-17,   0.0000e+00,   0.0000e+00])

In [68]:
print("- arr")
arr
row_means = arr.mean(1) # 1축 열을 나타내는 건데, 열들의 집합의 평균이기 때문에 오키
print("- row_means")
row_means
print("- row_means_reshape")
row_means.reshape((4, 1))
demeaned = arr - row_means.reshape((4, 1))
print("- demeaned")
demeaned
print("- demeaned_mean")
demeaned.mean(1)

- arr


array([[-0.4588,  0.9817,  1.0378],
       [-0.3054,  0.1685, -0.5494],
       [ 1.4433, -0.3236, -1.0287],
       [ 0.6621, -0.5349, -1.0365]])

- row_means


array([ 0.5203, -0.2288,  0.0303, -0.3031])

- row_means_reshape


array([[ 0.5203],
       [-0.2288],
       [ 0.0303],
       [-0.3031]])

- demeaned


array([[-0.979 ,  0.4615,  0.5175],
       [-0.0766,  0.3973, -0.3206],
       [ 1.413 , -0.3539, -1.0591],
       [ 0.9652, -0.2318, -0.7334]])

- demeaned_mean


array([ -3.7007e-17,   0.0000e+00,   0.0000e+00,   0.0000e+00])

In [69]:
arr - row_means # 에러 발생하지 형태가 맞지 않으니까!!

ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

### Broadcasting over other axes

In [112]:
arr - arr.mean(1)

ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

In [77]:
arr.mean(1)

array([ 0.5203, -0.2288,  0.0303, -0.3031])

In [78]:
arr.mean(0)

array([ 0.3353,  0.0729, -0.3942])

In [73]:
arr - arr.mean(0)

array([[-0.7941,  0.9088,  1.432 ],
       [-0.6407,  0.0955, -0.1552],
       [ 1.108 , -0.3965, -0.6345],
       [ 0.3268, -0.6078, -0.6422]])

In [79]:
arr
arr.mean(1).reshape((4,1))
arr - arr.mean(1).reshape((4, 1)) # 바람직한 방법!! 형태를 맞춰줘야한다.

array([[-0.4588,  0.9817,  1.0378],
       [-0.3054,  0.1685, -0.5494],
       [ 1.4433, -0.3236, -1.0287],
       [ 0.6621, -0.5349, -1.0365]])

array([[ 0.5203],
       [-0.2288],
       [ 0.0303],
       [-0.3031]])

array([[-0.979 ,  0.4615,  0.5175],
       [-0.0766,  0.3973, -0.3206],
       [ 1.413 , -0.3539, -1.0591],
       [ 0.9652, -0.2318, -0.7334]])

In [83]:
arr = np.zeros((4, 4))
arr_3d = arr[:, np.newaxis, :] # 축을 하나 추가시켰네.. 행을 추가 시켰네?!?!
arr_3d.shape

(4, 1, 4)

In [84]:
np.newaxis

In [82]:
arr
arr_3d

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

array([[[ 0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.]]])

In [90]:
arr_1d = np.random.normal(size=3)
arr_1d
arr_1d[:, np.newaxis] # 데이터 전체를 행으로, 열을 추가 시켰다. 3 X 1
arr_1d[np.newaxis, :] # 1 X 3

array([-0.1065,  1.3202,  0.0802])

array([[-0.1065],
       [ 1.3202],
       [ 0.0802]])

array([[-0.1065,  1.3202,  0.0802]])

In [102]:
arr = randn(3, 4, 5)
print("- arr")
arr
print("- arr.mean(2)")
arr.mean(2) # 2번 축(열)에 대해서 평균을 구하자는 거지! 3차원 -> 2차원 되지!!
            # 표,행,열 이었는데 행의 평균을 구한거지. 3x4x5 -> 3x4 됨!
            # 열에 대해서 평균을 냈으니, 열이 사라졌지!! 
depth_means = arr.mean(2) 
print("- depth_means[:, :, np.newaxis]")
depth_means[:, :, np.newaxis] # 3표 4행 1열이 되었네.
demeaned = arr - depth_means[:, :, np.newaxis]
# 형태 맞춰주기 위해서, 
print("- demeaned")
demeaned
print("- demeaned.mean(2)")
demeaned.mean(2) # 또 열에 대해서 평균???

- arr


array([[[ 1.7168,  0.2954,  1.5862, -1.8099, -0.2882],
        [-0.4177, -0.0478, -0.3929,  0.4078, -0.0306],
        [-0.6741, -0.4027,  2.3151,  0.0059,  0.0641],
        [-0.5744, -0.6845,  1.3389,  2.2734, -0.4402]],

       [[-1.363 ,  0.0462,  0.5549,  0.2168,  0.9548],
        [-1.7541,  0.4135, -0.4684,  0.1277,  0.2876],
        [-1.7598,  0.4823, -1.2997,  0.1739, -0.3369],
        [ 2.039 ,  0.127 , -0.2505,  1.1521, -1.9047]],

       [[ 0.4706, -0.3345, -1.1601, -0.1976,  0.5147],
        [ 0.8973,  1.4646, -0.9436,  0.871 , -0.8179],
        [ 0.3602, -1.0081,  0.5416,  0.754 ,  0.6497],
        [-1.6347, -0.6637,  0.6625,  1.8176,  0.4728]]])

- arr.mean(2)


array([[ 0.3   , -0.0962,  0.2616,  0.3826],
       [ 0.0819, -0.2787, -0.548 ,  0.2326],
       [-0.1414,  0.2943,  0.2594,  0.1309]])

- depth_means[:, :, np.newaxis]


array([[[ 0.3   ],
        [-0.0962],
        [ 0.2616],
        [ 0.3826]],

       [[ 0.0819],
        [-0.2787],
        [-0.548 ],
        [ 0.2326]],

       [[-0.1414],
        [ 0.2943],
        [ 0.2594],
        [ 0.1309]]])

- demeaned


array([[[ 1.4167, -0.0047,  1.2862, -2.11  , -0.5882],
        [-0.3215,  0.0485, -0.2967,  0.5041,  0.0656],
        [-0.9358, -0.6643,  2.0534, -0.2557, -0.1976],
        [-0.957 , -1.0672,  0.9563,  1.8907, -0.8228]],

       [[-1.445 , -0.0357,  0.473 ,  0.1348,  0.8728],
        [-1.4754,  0.6922, -0.1896,  0.4064,  0.5664],
        [-1.2117,  1.0303, -0.7516,  0.722 ,  0.2111],
        [ 1.8064, -0.1055, -0.483 ,  0.9195, -2.1373]],

       [[ 0.6119, -0.1931, -1.0187, -0.0562,  0.656 ],
        [ 0.603 ,  1.1703, -1.2379,  0.5767, -1.1122],
        [ 0.1007, -1.2676,  0.2821,  0.4945,  0.3902],
        [-1.7656, -0.7946,  0.5316,  1.6867,  0.3419]]])

- demeaned.mean(2)


array([[  0.0000e+00,   5.5511e-18,   0.0000e+00,  -8.8818e-17],
       [  0.0000e+00,  -2.2204e-17,   1.1102e-17,  -8.8818e-17],
       [ -2.2204e-17,   4.4409e-17,   2.2204e-17,  -6.6613e-17]])

#### This generalized things like [:, :, np.newaxis] to N dimensions

In [97]:
def demean_axis(arr, axis=0):
    means = arr.mean(axis)

    # This generalized things like [:, :, np.newaxis] to N dimensions
    indexer = [slice(None)] * arr.ndim
    indexer[axis] = np.newaxis
    return arr - means[indexer]

### Setting array values by broadcasting

In [107]:
arr = np.zeros((4, 3))
arr[:] = 5 # 1x1인 5가 4x3으로 변환되는 거다!!
arr

array([[ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.]])

In [110]:
col = np.array([1.28, -0.42, 0.44, 1.6])
arr[:] = col[:, np.newaxis] #  arr이 4x3이니 열이 3번 복제 되었지.
arr
arr[:2] = [[-1.37], [0.509]] # arr의 0,1 행에 다음 list를 추가시켜라
arr

array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

## Advanced ufunc usage

### Ufunc instance methods

In [None]:
arr = np.arange(10)
np.add.reduce(arr)
arr.sum()

In [None]:
np.random.seed(12346)

In [None]:
arr = randn(5, 5)
arr[::2].sort(1) # sort a few rows
arr[:, :-1] < arr[:, 1:]
np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)

In [None]:
arr = np.arange(15).reshape((3, 5))
np.add.accumulate(arr, axis=1)

In [None]:
arr = np.arange(3).repeat([1, 2, 2])
arr
np.multiply.outer(arr, np.arange(5))

In [None]:
result = np.subtract.outer(randn(3, 4), randn(5))
result.shape

In [None]:
arr = np.arange(10)
np.add.reduceat(arr, [0, 5, 8])

In [None]:
arr = np.multiply.outer(np.arange(4), np.arange(5))
arr
np.add.reduceat(arr, [0, 2, 4], axis=1)

### Custom ufuncs

In [None]:
def add_elements(x, y):
    return x + y
add_them = np.frompyfunc(add_elements, 2, 1)
add_them(np.arange(8), np.arange(8))

In [None]:
add_them = np.vectorize(add_elements, otypes=[np.float64])
add_them(np.arange(8), np.arange(8))

In [None]:
arr = randn(10000)
%timeit add_them(arr, arr)
%timeit np.add(arr, arr)

## Structured and record arrays

In [None]:
dtype = [('x', np.float64), ('y', np.int32)]
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)
sarr

In [None]:
sarr[0]
sarr[0]['y']

In [None]:
sarr['x']

### Nested dtypes and multidimensional fields

In [None]:
dtype = [('x', np.int64, 3), ('y', np.int32)]
arr = np.zeros(4, dtype=dtype)
arr

In [None]:
arr[0]['x']

In [None]:
arr['x']

In [None]:
dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)]
data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)
data['x']
data['y']
data['x']['a']

### Why use structured arrays?

### Structured array manipulations: numpy.lib.recfunctions

## More about sorting

In [None]:
arr = randn(6)
arr.sort()
arr

In [None]:
arr = randn(3, 5)
arr
arr[:, 0].sort()  # Sort first column values in-place
arr

In [None]:
arr = randn(5)
arr
np.sort(arr)
arr

In [None]:
arr = randn(3, 5)
arr
arr.sort(axis=1)
arr

In [None]:
arr[:, ::-1]

### Indirect sorts: argsort and lexsort

In [None]:
values = np.array([5, 0, 1, 3, 2])
indexer = values.argsort()
indexer
values[indexer]

In [None]:
arr = randn(3, 5)
arr[0] = values
arr
arr[:, arr[0].argsort()]

In [None]:
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])
sorter = np.lexsort((first_name, last_name))
zip(last_name[sorter], first_name[sorter])

### Alternate sort algorithms

In [None]:
values = np.array(['2:first', '2:second', '1:first', '1:second', '1:third'])
key = np.array([2, 2, 1, 1, 1])
indexer = key.argsort(kind='mergesort')
indexer
values.take(indexer)

### numpy.searchsorted: Finding elements in a sorted array

In [None]:
arr = np.array([0, 1, 7, 12, 15])
arr.searchsorted(9)

In [None]:
arr.searchsorted([0, 8, 11, 16])

In [None]:
arr = np.array([0, 0, 0, 1, 1, 1, 1])
arr.searchsorted([0, 1])
arr.searchsorted([0, 1], side='right')

In [None]:
data = np.floor(np.random.uniform(0, 10000, size=50))
bins = np.array([0, 100, 1000, 5000, 10000])
data

In [None]:
labels = bins.searchsorted(data)
labels

In [None]:
Series(data).groupby(labels).mean()

In [None]:
np.digitize(data, bins)

## NumPy matrix class

In [None]:
X =  np.array([[ 8.82768214,  3.82222409, -1.14276475,  2.04411587],
               [ 3.82222409,  6.75272284,  0.83909108,  2.08293758],
               [-1.14276475,  0.83909108,  5.01690521,  0.79573241],
               [ 2.04411587,  2.08293758,  0.79573241,  6.24095859]])
X[:, 0]  # one-dimensional
y = X[:, :1]  # two-dimensional by slicing
X
y

In [None]:
np.dot(y.T, np.dot(X, y))

In [None]:
Xm = np.matrix(X)
ym = Xm[:, 0]
Xm
ym
ym.T * Xm * ym

In [None]:
Xm.I * X

## Advanced array input and output

### Memory-mapped files

In [None]:
mmap = np.memmap('mymmap', dtype='float64', mode='w+', shape=(10000, 10000))
mmap

In [None]:
section = mmap[:5]

In [None]:
section[:] = np.random.randn(5, 10000)
mmap.flush()
mmap
del mmap

In [None]:
mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))
mmap

In [None]:
%xdel mmap
!rm mymmap

### HDF5 and other array storage options

## Performance tips

### The importance of contiguous memory

In [None]:
arr_c = np.ones((1000, 1000), order='C')
arr_f = np.ones((1000, 1000), order='F')
arr_c.flags
arr_f.flags
arr_f.flags.f_contiguous

In [None]:
%timeit arr_c.sum(1)
%timeit arr_f.sum(1)

In [None]:
arr_f.copy('C').flags

In [None]:
arr_c[:50].flags.contiguous
arr_c[:, :50].flags

In [None]:
%xdel arr_c
%xdel arr_f
%cd ..

## Other speed options: Cython, f2py, C

```cython
from numpy cimport ndarray, float64_t

def sum_elements(ndarray[float64_t] arr):
    cdef Py_ssize_t i, n = len(arr)
    cdef float64_t result = 0

    for i in range(n):
        result += arr[i]

    return result
```