In [1]:
import numpy as np

np.__version__

'1.13.3'

In [2]:
import scipy

scipy.__version__

'1.1.0'

`numpy` – библиотека для векторизованных вычислений. Написана на Си.

`scipy` – библиотека для научных вычислений. Написана на Си, C++ и Fortran.

## За что не любят Python и любят C/C++?

In [3]:
a = list(range(1_000_000))

In [4]:
%%timeit

[e * e for e in a]

109 ms ± 16.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [5]:
a = np.arange(1_000_000)

In [6]:
%%timeit

a * a

3.98 ms ± 96.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


Самое медленное место в языке – циклы.

In [7]:
rows, cols = 1_000, 1_000

a = [list(range(i, i + cols)) for i in range(0, rows)]
b = [[0 for j in range(cols)] for i in range(rows)]

In [8]:
%%timeit

for i in range(rows):
    for j in range(cols):
        b[i][j] = 2 * a[i][j]

220 ms ± 7.51 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
a = np.asarray(a)

In [10]:
%%timeit

2 * a

4.14 ms ± 107 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Создание массивов

In [11]:
a = np.array([1, 2, 3, 4, 5])
a

array([1, 2, 3, 4, 5])

In [12]:
a = np.array([1, 2, 3, 4, 5], dtype=np.float64)
a

array([ 1.,  2.,  3.,  4.,  5.])

In [13]:
a = np.array([[1, 2, 3, 4, 5],
              [6, 7, 8, 9, 0]], dtype=float)
a

array([[ 1.,  2.,  3.,  4.,  5.],
       [ 6.,  7.,  8.,  9.,  0.]])

In [14]:
a.shape

(2, 5)

In [15]:
a.ndim

2

In [16]:
a.dtype

dtype('float64')

In [17]:
a.astype(int)

array([[1, 2, 3, 4, 5],
       [6, 7, 8, 9, 0]])

## Размерность массивов

<img src="files/data.png" width="450px">

In [18]:
a

array([[ 1.,  2.,  3.,  4.,  5.],
       [ 6.,  7.,  8.,  9.,  0.]])

In [19]:
a.dtype

dtype('float64')

In [20]:
a.dtype.itemsize   # sizeof(float64)

8

In [21]:
a.shape

(2, 5)

In [22]:
a.strides

(40, 8)

In [23]:
b = a.reshape(5, 2)
b

array([[ 1.,  2.],
       [ 3.,  4.],
       [ 5.,  6.],
       [ 7.,  8.],
       [ 9.,  0.]])

In [24]:
b.shape

(5, 2)

In [25]:
b.strides

(16, 8)

In [26]:
a.reshape(-1, 2)

array([[ 1.,  2.],
       [ 3.,  4.],
       [ 5.,  6.],
       [ 7.,  8.],
       [ 9.,  0.]])

In [27]:
b = a.reshape(5, -1)   # same memory

a[0, 1] = -10
b

array([[  1., -10.],
       [  3.,   4.],
       [  5.,   6.],
       [  7.,   8.],
       [  9.,   0.]])

In [28]:
b = a.flatten()   # copy
a[0, 1] = -20

b

array([  1., -10.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,   0.])

In [29]:
b = a.ravel()     # view (same memory)
a[0, 1] = -30

b

array([  1., -30.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,   0.])

In [30]:
c = a.T           # view (same memory)   # same as a.transpose()
a[0, 1] = -40

c

array([[  1.,   6.],
       [-40.,   7.],
       [  3.,   8.],
       [  4.,   9.],
       [  5.,   0.]])

In [31]:
c.strides, a.strides

((8, 40), (40, 8))

Фиктивная ось – ось с размерностью 1.

In [32]:
b = np.arange(10)
b

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [33]:
b[np.newaxis, :]   # same as b.reshape(1, *b.shape)

array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [34]:
np.expand_dims(b, axis=0)

array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [35]:
b[:, np.newaxis]   # same as b.reshape(*b.shape, 1)

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [36]:
b[np.newaxis, :, np.newaxis].shape

(1, 10, 1)

### Создание массивов с особыми свойствами

In [37]:
np.zeros(shape=(3, 2))

array([[ 0.,  0.],
       [ 0.,  0.],
       [ 0.,  0.]])

In [38]:
np.zeros_like(a)

array([[ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.]])

In [39]:
np.ones(5)   # same as np.ones(shape=(5, ))

array([ 1.,  1.,  1.,  1.,  1.])

In [40]:
np.eye(4)

array([[ 1.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.],
       [ 0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  1.]])

In [41]:
np.arange(1, 10)

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [42]:
np.arange(1, 10, 2)

array([1, 3, 5, 7, 9])

In [43]:
np.arange(1, 10, 0.5)

array([ 1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,  5.5,  6. ,
        6.5,  7. ,  7.5,  8. ,  8.5,  9. ,  9.5])

In [44]:
np.linspace(0, 1, 5, endpoint=True)

array([ 0.  ,  0.25,  0.5 ,  0.75,  1.  ])

### Избегаем ненужного копирования

In [45]:
a = np.array([1, 2, 3, 4, 5], dtype=np.float32)
b = np.asarray(a)
c = np.array(a)

b, c

(array([ 1.,  2.,  3.,  4.,  5.], dtype=float32),
 array([ 1.,  2.,  3.,  4.,  5.], dtype=float32))

In [46]:
a[0] = 0
b, c

(array([ 0.,  2.,  3.,  4.,  5.], dtype=float32),
 array([ 1.,  2.,  3.,  4.,  5.], dtype=float32))

In [47]:
a = np.array([1, 2, 3, 4, 5], dtype=np.float32)
b = np.asarray(a, dtype=np.int32)
c = np.array(a)

a, b, c

(array([ 1.,  2.,  3.,  4.,  5.], dtype=float32),
 array([1, 2, 3, 4, 5], dtype=int32),
 array([ 1.,  2.,  3.,  4.,  5.], dtype=float32))

In [48]:
a[0] = 0
a, b, c

(array([ 0.,  2.,  3.,  4.,  5.], dtype=float32),
 array([1, 2, 3, 4, 5], dtype=int32),
 array([ 1.,  2.,  3.,  4.,  5.], dtype=float32))

In [49]:
d = [1, 2, 3, 4, 5]
a = np.asarray(d)

d[0] = 0
a

array([1, 2, 3, 4, 5])

## Поэлементные операции над массивами

### Операции со скалярами и унарные операции

In [50]:
a = np.arange(10).reshape(2, -1)
a

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [51]:
a ** 3   # same as np.power(a, 3)

array([[  0,   1,   8,  27,  64],
       [125, 216, 343, 512, 729]])

In [52]:
a + 2    # same as np.add(a, 2)

array([[ 2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11]])

In [53]:
2 * a    # same as np.multiply(2, a)

array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18]])

In [54]:
2 ** a   # same as np.power(2, a)

array([[  1,   2,   4,   8,  16],
       [ 32,  64, 128, 256, 512]])

In [55]:
np.sqrt(a)

array([[ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ],
       [ 2.23606798,  2.44948974,  2.64575131,  2.82842712,  3.        ]])

In [56]:
np.exp(a)

array([[  1.00000000e+00,   2.71828183e+00,   7.38905610e+00,
          2.00855369e+01,   5.45981500e+01],
       [  1.48413159e+02,   4.03428793e+02,   1.09663316e+03,
          2.98095799e+03,   8.10308393e+03]])

In [57]:
np.log(1 + a)

array([[ 0.        ,  0.69314718,  1.09861229,  1.38629436,  1.60943791],
       [ 1.79175947,  1.94591015,  2.07944154,  2.19722458,  2.30258509]])

In [58]:
np.log2(1 + a)

array([[ 0.        ,  1.        ,  1.5849625 ,  2.        ,  2.32192809],
       [ 2.5849625 ,  2.80735492,  3.        ,  3.169925  ,  3.32192809]])

In [59]:
np.sin(a)

array([[ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ],
       [-0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849]])

In [60]:
a > 0   # same as np.greater(a, 0)

array([[False,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]], dtype=bool)

### Агрегирующие операции

In [61]:
np.random.seed(5656)
a = np.random.randint(0, 10, size=(7, ))
a[3] = 10
a

array([ 4,  6,  7, 10,  0,  3,  5])

In [62]:
a.min(), a.max(), a.argmax(), a.sum(), a.prod(), a.mean()

(0, 10, 3, 35, 0, 5.0)

In [63]:
np.min(a), np.max(a), np.argmax(a), np.sum(a), np.prod(a), np.mean(a)

(0, 10, 3, 35, 0, 5.0)

In [64]:
# крайне не рекомендуемый вариант

min(a), max(a), sum(a)   # только для одномерных массивов 

(0, 10, 35)

<img src="files/axis.png" width="350px">

`a.agg(axis=axis)` – агрегирующая операция вдоль оси `axis`:
* выполняет редукцию (агрегирующую операцию) по оси `axis`;
* удаляет ось `axis` из исходного массива.

In [65]:
np.random.seed(5555)

a = np.random.randint(0, 10, size=(3, 7))
a[1, 3] = 15

a

array([[ 2,  3,  0,  5,  2,  0,  3],
       [ 8,  8,  0, 15,  1,  5,  3],
       [ 0,  1,  6,  2,  1,  4,  5]])

In [66]:
a.max(axis=0)   # редукция вдоль оси axis=0 или редукция по столбцу

array([ 8,  8,  6, 15,  2,  5,  5])

In [67]:
a.sum(axis=1)   # редукция вдоль оси axis=1 или редукция по строке

array([15, 40, 19])

In [68]:
a

array([[ 2,  3,  0,  5,  2,  0,  3],
       [ 8,  8,  0, 15,  1,  5,  3],
       [ 0,  1,  6,  2,  1,  4,  5]])

In [69]:
a.argmax()

10

In [70]:
a.ravel()[a.argmax()]

15

In [71]:
i = np.argmax(a)
i, j = i // a.shape[1], i % a.shape[1]

print("value =", a[i, j])
print("index =", (i, j))

value = 15
index = (1, 3)


In [72]:
np.unravel_index(np.argmax(a), a.shape)

(1, 3)

Что быстрее?

In [73]:
b = np.random.randint(0, 10, size=(1_000, 1_000))
b[36, 42] = 20
b = b.ravel()

In [74]:
%%timeit

b.max()

1.71 ms ± 14.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [75]:
%%timeit

max(b)

95.3 ms ± 1.12 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


### Унарные операции над булевыми массивами

In [76]:
a = np.asarray([True, True, False, False, True])
a

array([ True,  True, False, False,  True], dtype=bool)

In [77]:
a.any(), np.any(a), any(a)   # последний вариант крайне не рекомендуется

(True, True, True)

In [78]:
a.all(), np.all(a), all(a)   # последний вариант крайне не рекомендуется

(False, False, False)

In [79]:
a = np.asarray([[True, True,  False, False, True ],
                [True, False, False, True,  False]])
a

array([[ True,  True, False, False,  True],
       [ True, False, False,  True, False]], dtype=bool)

In [80]:
a.any(axis=0)

array([ True,  True, False,  True,  True], dtype=bool)

In [81]:
a.all(axis=1)

array([False, False], dtype=bool)

In [82]:
np.logical_not(a)

array([[False, False,  True,  True, False],
       [False,  True,  True, False,  True]], dtype=bool)

In [83]:
~a   # bitwise ; same as np.bitwise_not(a)

array([[False, False,  True,  True, False],
       [False,  True,  True, False,  True]], dtype=bool)

### Бинарные операции

In [84]:
a = a.astype(int)
a

array([[1, 1, 0, 0, 1],
       [1, 0, 0, 1, 0]])

In [85]:
np.random.seed(4968)

b = np.random.randint(0, 10, size=a.shape)
b

array([[3, 0, 8, 1, 3],
       [1, 0, 2, 7, 8]])

In [86]:
a * b   # same as np.multiply(a, b)

array([[3, 0, 0, 0, 3],
       [1, 0, 0, 7, 0]])

In [87]:
a + b   # same as np.add(a, b)

array([[4, 1, 8, 1, 4],
       [2, 0, 2, 8, 8]])

In [88]:
a

array([[1, 1, 0, 0, 1],
       [1, 0, 0, 1, 0]])

In [89]:
b

array([[3, 0, 8, 1, 3],
       [1, 0, 2, 7, 8]])

In [90]:
np.fmax(a, b)   # element-wise maximum

array([[3, 1, 8, 1, 3],
       [1, 0, 2, 7, 8]])

In [91]:
a > b   # same as np.greater(a, b)

array([[False,  True, False, False, False],
       [False, False, False, False, False]], dtype=bool)

Что еще умеют бинарные `ufunc`: https://jakevdp.github.io/PythonDataScienceHandbook/02.03-computation-on-arrays-ufuncs.html

In [92]:
np.random.seed(4987)

a = np.random.random(size=(2, 5))
b = a + np.random.random(size=(2, 5)) * 1e-5

In [93]:
a

array([[ 0.71700122,  0.2663399 ,  0.363807  ,  0.21265099,  0.97651262],
       [ 0.61392251,  0.62911827,  0.19253836,  0.96386783,  0.50772562]])

In [94]:
b

array([[ 0.71700756,  0.2663407 ,  0.36381509,  0.21265902,  0.97651597],
       [ 0.61392769,  0.62912558,  0.19254206,  0.96387406,  0.50773147]])

In [95]:
np.isclose(a, b)

array([[ True,  True, False, False,  True],
       [ True, False, False,  True, False]], dtype=bool)

In [96]:
np.allclose(a, b)     # same as np.isclose(a, b).all()

False

In [97]:
# np.anyclose(a, b)   # doesn't exist
np.isclose(a, b).any()

True

In [98]:
np.random.seed(4987)

a = (np.random.random(size=(5, )) - 0.5) * 1e-7
np.isclose(a, 0, atol=1e-6)

array([ True,  True,  True,  True,  True], dtype=bool)

### Бинарные операции над булевыми массивами

In [99]:
a = np.asarray([True, True,  False, False, True ])
b = np.asarray([True, False, False, True,  False])

a, b

(array([ True,  True, False, False,  True], dtype=bool),
 array([ True, False, False,  True, False], dtype=bool))

In [100]:
np.logical_and(a, b), np.logical_or(a, b), np.logical_xor(a, b)

(array([ True, False, False, False, False], dtype=bool),
 array([ True,  True, False,  True,  True], dtype=bool),
 array([False,  True, False,  True,  True], dtype=bool))

In [101]:
a & b, a | b, a ^ b   # bitwise

(array([ True, False, False, False, False], dtype=bool),
 array([ True,  True, False,  True,  True], dtype=bool),
 array([False,  True, False,  True,  True], dtype=bool))

In [102]:
np.bitwise_and(a, b), np.bitwise_or(a, b), np.bitwise_xor(a, b)

(array([ True, False, False, False, False], dtype=bool),
 array([ True,  True, False,  True,  True], dtype=bool),
 array([False,  True, False,  True,  True], dtype=bool))

### Более хитрые примеры бинарных операций 😏

In [103]:
a = np.arange(30).reshape(3, -1)
a

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]])

In [104]:
b = np.arange(a.shape[1])
b

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [105]:
print("a.shape =", a.shape)
print("b.shape =", b.shape)

a.shape = (3, 10)
b.shape = (10,)


In [106]:
a + b

array([[ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18],
       [10, 12, 14, 16, 18, 20, 22, 24, 26, 28],
       [20, 22, 24, 26, 28, 30, 32, 34, 36, 38]])

In [107]:
b = np.arange(a.shape[0])
b

array([0, 1, 2])

In [108]:
print("a.shape =", a.shape)
print("b.shape =", b.shape)

a.shape = (3, 10)
b.shape = (3,)


In [109]:
# Ooops!

a + b

ValueError: operands could not be broadcast together with shapes (3,10) (3,) 

In [110]:
b = np.arange(a.shape[0])[:, np.newaxis]
b

array([[0],
       [1],
       [2]])

In [111]:
print("a.shape =", a.shape)
print("b.shape =", b.shape)

a.shape = (3, 10)
b.shape = (3, 1)


In [112]:
a + b

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
       [22, 23, 24, 25, 26, 27, 28, 29, 30, 31]])

### И еще более хитрые примеры бинарных операций 🤯

In [113]:
a = np.arange(5).reshape(1, -1)
a

array([[0, 1, 2, 3, 4]])

In [114]:
b = a.reshape(-1, 1)
b

array([[0],
       [1],
       [2],
       [3],
       [4]])

In [115]:
print("a.shape =", a.shape)
print("b.shape =", b.shape)

a.shape = (1, 5)
b.shape = (5, 1)


In [116]:
a + b

array([[0, 1, 2, 3, 4],
       [1, 2, 3, 4, 5],
       [2, 3, 4, 5, 6],
       [3, 4, 5, 6, 7],
       [4, 5, 6, 7, 8]])

In [117]:
a >= b

array([[ True,  True,  True,  True,  True],
       [False,  True,  True,  True,  True],
       [False, False,  True,  True,  True],
       [False, False, False,  True,  True],
       [False, False, False, False,  True]], dtype=bool)

Поэтому, как говорили на уроках физики,
<center><h3>Не забывай следить за размерностью!</h3></center>

**Правило приведения размерностей (broadcasting):**
1. Предположим, что `a.shape = (a_1, a_2, ..., a_n)` и `b.shape = (b_1, b_2, ..., b_n)`. Над `a` и `b` можно произвести поэлементую бинарную операцию, если $\forall \; i \in \overline{1..n}$ выполнено хотя бы одно из условий:
    * `a_i == b_i`;
    * `a_i == 1`;
    * `b_i == 1`.


2. Если размерности не совпадают, то к массиву меньшей размерности добавляются ведущие фиктивные размерности. 

Документация: https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html

Пункт `a_i == b_i` означает, что размерность в точности совпадает.

In [118]:
a = np.arange(20).reshape(4, -1)
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [119]:
np.random.seed(1987)

b = np.random.randint(-1, 2, size=(4, 5))
b

array([[-1,  1,  1,  1,  1],
       [ 1, -1, -1,  0,  0],
       [ 0,  1,  0,  1, -1],
       [-1,  0, -1, -1,  0]])

In [120]:
a * b

array([[  0,   1,   2,   3,   4],
       [  5,  -6,  -7,   0,   0],
       [  0,  11,   0,  13, -14],
       [-15,   0, -17, -18,   0]])

Пункты `a_i == 1` или `b_i == 1` означает, что массив по этой оси можно повторить нужное число раз, и свести задачу к предыдущему пункту.

In [121]:
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [122]:
np.random.seed(7892)

b = np.random.randint(-1, 2, size=(4, 1))
b

array([[-1],
       [ 1],
       [ 1],
       [ 0]])

In [123]:
print("a.shape =", a.shape)
print("b.shape =", b.shape)

a.shape = (4, 5)
b.shape = (4, 1)


In [124]:
a * b

array([[ 0, -1, -2, -3, -4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [ 0,  0,  0,  0,  0]])

In [125]:
c = np.repeat(b, a.shape[1], axis=1)
c

array([[-1, -1, -1, -1, -1],
       [ 1,  1,  1,  1,  1],
       [ 1,  1,  1,  1,  1],
       [ 0,  0,  0,  0,  0]])

In [126]:
print("a.shape =", a.shape)
print("c.shape =", c.shape)

a.shape = (4, 5)
c.shape = (4, 5)


In [127]:
a * c

array([[ 0, -1, -2, -3, -4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [ 0,  0,  0,  0,  0]])

Если размерности не совпадают, то добавляются ведущие фиктивные размерности.

In [128]:
a = np.arange(20).reshape(-1, 5)
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [129]:
np.random.seed(6579)

b = np.random.randint(0, 2, size=5)
b

array([0, 0, 1, 1, 0])

In [130]:
print("a.shape =", a.shape)
print("b.shape =", b.shape)

a.shape = (4, 5)
b.shape = (5,)


In [131]:
a * b

array([[ 0,  0,  2,  3,  0],
       [ 0,  0,  7,  8,  0],
       [ 0,  0, 12, 13,  0],
       [ 0,  0, 17, 18,  0]])

In [132]:
c = b[np.newaxis, :]
c

array([[0, 0, 1, 1, 0]])

In [133]:
print("a.shape =", a.shape)
print("c.shape =", c.shape)

a.shape = (4, 5)
c.shape = (1, 5)


In [134]:
c * b

array([[0, 0, 1, 1, 0]])

## Матричные операции над массивами

In [135]:
av = np.arange(1, 5).reshape(-1, 2)
av

array([[1, 2],
       [3, 4]])

In [136]:
bv = np.array([[1, 2], [-2, 1]])
bv

array([[ 1,  2],
       [-2,  1]])

In [137]:
av * bv

array([[ 1,  4],
       [-6,  4]])

In [138]:
np.matmul(av, bv)

array([[-3,  4],
       [-5, 10]])

In [139]:
np.dot(av, bv)

array([[-3,  4],
       [-5, 10]])

In [140]:
# no copy, pls

am = np.asmatrix(av)
bm = np.asmatrix(bv)

am

matrix([[1, 2],
        [3, 4]])

In [141]:
am * bm

matrix([[-3,  4],
        [-5, 10]])

In [142]:
av ** 2

array([[ 1,  4],
       [ 9, 16]])

In [143]:
am ** 2

matrix([[ 7, 10],
        [15, 22]])

Все самое важное для линейной алгебры:

https://docs.scipy.org/doc/numpy/reference/routines.linalg.html

## Индексация в одномерных массивах

**Замечание:** индексация может быть использована не только для получения значений, но и для их присвоения.

In [144]:
a = np.arange(15)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [145]:
a[0], a[5], a[len(a) - 1]

(0, 5, 14)

### Отрицательные индексы

In [146]:
a[len(a) - 1], a[-1]

(14, 14)

In [147]:
a[len(a) - 5], a[-5]

(10, 10)

### Срезы (slice)

**Общее правило:** `массив[первый индекс:последний индекс:шаг]`.

Значения по-умолчанию:
    * первый индекс = 0; 
    * последний индекс = len(массив);
    * шаг = 1;
    
`последний индекс` не включается.

In [148]:
a = np.arange(15)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

Взять первые 5 элементов.

In [149]:
variants = [ a[0:5:1], a[0:5], a[:5] ]

print(*map(repr, variants), sep='\n')

array([0, 1, 2, 3, 4])
array([0, 1, 2, 3, 4])
array([0, 1, 2, 3, 4])


In [150]:
variants = [ a[slice(0, 5, 1)], a[slice(0, 5)], a[slice(5)] ]

print(*map(repr, variants), sep='\n')

array([0, 1, 2, 3, 4])
array([0, 1, 2, 3, 4])
array([0, 1, 2, 3, 4])


Взять все элементы, стоящих на четных позициях.

In [151]:
variants = [ a[0:len(a):2], a[0::2], a[::2] ]

print(*map(repr, variants), sep='\n')

array([ 0,  2,  4,  6,  8, 10, 12, 14])
array([ 0,  2,  4,  6,  8, 10, 12, 14])
array([ 0,  2,  4,  6,  8, 10, 12, 14])


Взять все элементы, стоящие на нечетных позициях.

In [152]:
variants = [ a[1:len(a):2], a[1::2] ]

print(*map(repr, variants), sep='\n')

array([ 1,  3,  5,  7,  9, 11, 13])
array([ 1,  3,  5,  7,  9, 11, 13])


Взять все элементы с 3 по 12 (не включительно) с шагом 3.

In [153]:
variants = [ a[3:12:3], a[3:-3:3] ]

print(*map(repr, variants), sep='\n')

array([3, 6, 9])
array([3, 6, 9])


Взять все элементы с 3 по 12 (включительно) с шагом 3 в обратном порядке.

In [154]:
variants = [ a[3:13:3][::-1], a[12:2:-3], a[-3:2:-3] ]

print(*map(repr, variants), sep='\n')

array([12,  9,  6,  3])
array([12,  9,  6,  3])
array([12,  9,  6,  3])


### Булева индексация (маски)

In [155]:
np.random.seed(1234)

a = np.random.randint(-2, 7, 34)
a

array([ 1,  4,  3,  2,  6, -1,  5,  4,  6, -2,  3, -2,  4,  0, -2,  3,  0,
        4,  1,  5, -2, -2,  1,  0,  1, -1,  1, -1,  1,  5, -1,  5,  2, -2])

Найти все отрицательные элементы.

In [156]:
a < 0

array([False, False, False, False, False,  True, False, False, False,
        True, False,  True, False, False,  True, False, False, False,
       False, False,  True,  True, False, False, False,  True, False,
        True, False, False,  True, False, False,  True], dtype=bool)

In [157]:
a[a < 0]

array([-1, -2, -2, -2, -2, -2, -1, -1, -1, -2])

Найти все элементы, кратные 3-м.

In [158]:
variants = [ a[a % 3 == 0], a[np.logical_not(a % 3)], a[~((a % 3).astype(bool))] ]

print(*map(repr, variants), sep='\n')

array([3, 6, 6, 3, 0, 3, 0, 0])
array([3, 6, 6, 3, 0, 3, 0, 0])
array([3, 6, 6, 3, 0, 3, 0, 0])


In [159]:
np.random.seed(1234)

a = np.random.randint(0, 9, 23)
a

array([3, 6, 5, 4, 8, 1, 7, 6, 8, 0, 5, 0, 6, 2, 0, 5, 2, 6, 3, 7, 0, 0, 3])

Найти все элементы, кратные или 3, или 5.

In [160]:
mask_3 = a % 3 == 0
mask_5 = a % 5 == 0

variants = [ a[np.logical_or(mask_3, mask_5)], a[mask_3 | mask_5] ]

print(*map(repr, variants), sep='\n')

array([3, 6, 5, 6, 0, 5, 0, 6, 0, 5, 6, 3, 0, 0, 3])
array([3, 6, 5, 6, 0, 5, 0, 6, 0, 5, 6, 3, 0, 0, 3])


Найти все элементы, кратные и 2, и 3.

In [161]:
mask_2 = a % 2 == 0
mask_3 = a % 3 == 0

variants = [ a[np.logical_and(mask_2, mask_3)], a[mask_2 & mask_3], a[a % 6 == 0] ]

print(*map(repr, variants), sep='\n')

array([6, 6, 0, 0, 6, 0, 6, 0, 0])
array([6, 6, 0, 0, 6, 0, 6, 0, 0])
array([6, 6, 0, 0, 6, 0, 6, 0, 0])


## Индексация в многомерных массивах

In [162]:
a = np.arange(30).reshape(5, -1)
a

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29]])

In [163]:
# плохой способ, мы работаем с numpy-массивами, а не со списками

a[0][0], a[0][2], a[1][1], a[-1][-2]

(0, 2, 7, 28)

In [164]:
# хороший способ

a[0,0], a[0,2], a[1,1], a[-1,-2]

(0, 2, 7, 28)

Получить строку с индексом 2.

In [165]:
variants = [ a[2], a[2,:] ]

print(*map(repr, variants), sep='\n')

array([12, 13, 14, 15, 16, 17])
array([12, 13, 14, 15, 16, 17])


Получить столбец с индексом 3.

In [166]:
a[:,3]

array([ 3,  9, 15, 21, 27])

Получить все элементы, стоящие в четных столбцах.

In [167]:
a[:,::2]

array([[ 0,  2,  4],
       [ 6,  8, 10],
       [12, 14, 16],
       [18, 20, 22],
       [24, 26, 28]])

Получить все элементы, стоящие в первой (0-й) строке и нечетных стоблцах.

In [168]:
a[0,1::2]

array([1, 3, 5])

Получить все элементы `a[i,j]`, такие что:
    * i – нечетные;
    * j – дающие остаток 2 при делении на 3;
индексация по строкам, должна быть обратной.

In [169]:
a[1::2,2::3][::-1]

array([[20, 23],
       [ 8, 11]])

In [170]:
np.random.seed(2238)

a = np.random.randint(-5, 5, size=(5, 5))
a

array([[ 3,  2, -1, -3,  4],
       [ 3, -2, -2, -2, -5],
       [ 3,  2, -1,  1,  1],
       [ 2,  0,  4, -2,  0],
       [ 0,  4,  1,  2,  4]])

Получите строки, в которых есть хотя бы один 0.

In [171]:
a[(a == 0).any(axis=1)]

array([[ 2,  0,  4, -2,  0],
       [ 0,  4,  1,  2,  4]])

Получите столбцы, в которых число положительных элементов больше числа отрицательных.

In [172]:
a[:, (a > 0).sum(axis=0) > (a < 0).sum(axis=0)]

array([[ 3,  2,  4],
       [ 3, -2, -5],
       [ 3,  2,  1],
       [ 2,  0,  0],
       [ 0,  4,  4]])

Получить положительные элементы.

In [173]:
a[a > 0]

array([3, 2, 4, 3, 3, 2, 1, 1, 2, 4, 4, 1, 2, 4])

Получить индексы положительных элементов.

In [174]:
np.where(a > 0)

(array([0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4]),
 array([0, 1, 4, 0, 0, 1, 3, 4, 0, 2, 1, 2, 3, 4]))

In [175]:
a[np.where(a > 0)]

array([3, 2, 4, 3, 3, 2, 1, 1, 2, 4, 4, 1, 2, 4])

In [176]:
np.vstack(np.where(a > 0)).T

array([[0, 0],
       [0, 1],
       [0, 4],
       [1, 0],
       [2, 0],
       [2, 1],
       [2, 3],
       [2, 4],
       [3, 0],
       [3, 2],
       [4, 1],
       [4, 2],
       [4, 3],
       [4, 4]])

### Fancy Indexing

In [177]:
a

array([[ 3,  2, -1, -3,  4],
       [ 3, -2, -2, -2, -5],
       [ 3,  2, -1,  1,  1],
       [ 2,  0,  4, -2,  0],
       [ 0,  4,  1,  2,  4]])

Получить 2, 4 и 3 строки.

In [178]:
a[[2, 4, 3]]

array([[ 3,  2, -1,  1,  1],
       [ 0,  4,  1,  2,  4],
       [ 2,  0,  4, -2,  0]])

Получить элементы 2, 4, 3 строках и в 0-м и последнем столбце. 

In [179]:
# Ooops!

a[[2, 4, 3], [0, -1]]

IndexError: shape mismatch: indexing arrays could not be broadcast together with shapes (3,) (2,) 

In [180]:
a

array([[ 3,  2, -1, -3,  4],
       [ 3, -2, -2, -2, -5],
       [ 3,  2, -1,  1,  1],
       [ 2,  0,  4, -2,  0],
       [ 0,  4,  1,  2,  4]])

In [181]:
# Хммм... Такая же логика, как с np.where

a[[2, 4, 3],[0, -1, 2]]

array([3, 4, 4])

In [182]:
# решим все же изначальную проблему

a[[2, 4, 3]][:,[0, -1]]

array([[3, 1],
       [0, 4],
       [2, 0]])

In [183]:
a[np.ix_([2, 4, 3], [0, -1])]

array([[3, 1],
       [0, 4],
       [2, 0]])

А в чем разница?

In [184]:
a = np.arange(15).reshape(3, -1)
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [185]:
# copy

a[[0, 1],:][:,[0, 3, 2]] = 100
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [186]:
# view

a[np.ix_([0, 1], [0, 3, 2])] = 100
a

array([[100,   1, 100, 100,   4],
       [100,   6, 100, 100,   9],
       [ 10,  11,  12,  13,  14]])

Разница между view и copy:

https://www.jessicayung.com/numpy-views-vs-copies-avoiding-costly-mistakes/

### Сокращенная индексация

In [187]:
a = np.arange(24).reshape(2, 3, 4)
a

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

In [188]:
a[..., 0]

array([[ 0,  4,  8],
       [12, 16, 20]])

In [189]:
a[0, ...]

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [190]:
a[..., 0, :]

array([[ 0,  1,  2,  3],
       [12, 13, 14, 15]])

## Что ещё полезного есть в numpy и scipy?

### Генерация случайных чисел

In [191]:
np.random.rand(10)

array([ 0.22024735,  0.88293447,  0.69184538,  0.00523532,  0.48975893,
        0.78255915,  0.4244878 ,  0.45827732,  0.0449126 ,  0.5361042 ])

In [192]:
np.random.randint(0, 10, 10)

array([8, 1, 5, 3, 8, 9, 7, 2, 1, 0])

In [193]:
np.random.permutation(10)

array([6, 8, 5, 4, 9, 3, 1, 0, 2, 7])

In [194]:
np.random.choice(10, size=10)

array([4, 0, 2, 5, 7, 9, 3, 5, 4, 4])

### Сортировка

In [195]:
np.random.seed(4445)

a = np.random.choice(10, size=(3, 10))
a

array([[6, 0, 3, 1, 5, 3, 1, 6, 6, 4],
       [5, 3, 7, 1, 3, 0, 5, 5, 3, 6],
       [4, 1, 1, 0, 4, 3, 3, 3, 2, 1]])

In [196]:
np.sort(a.ravel())   # returns sorted copy

array([0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5,
       5, 5, 6, 6, 6, 6, 7])

In [197]:
a.sort(axis=0)       # inplace sort
a

array([[4, 0, 1, 0, 3, 0, 1, 3, 2, 1],
       [5, 1, 3, 1, 4, 3, 3, 5, 3, 4],
       [6, 3, 7, 1, 5, 3, 5, 6, 6, 6]])

In [198]:
a.sort(axis=1)
a

array([[0, 0, 0, 1, 1, 1, 2, 3, 3, 4],
       [1, 1, 3, 3, 3, 3, 4, 4, 5, 5],
       [1, 3, 3, 5, 5, 6, 6, 6, 6, 7]])

In [199]:
np.random.seed(4445)

a = np.random.choice(10, size=(3, 10))
a

array([[6, 0, 3, 1, 5, 3, 1, 6, 6, 4],
       [5, 3, 7, 1, 3, 0, 5, 5, 3, 6],
       [4, 1, 1, 0, 4, 3, 3, 3, 2, 1]])

In [200]:
a.argsort(axis=0)

array([[2, 0, 2, 2, 1, 1, 0, 2, 2, 2],
       [1, 2, 0, 0, 2, 0, 2, 1, 1, 0],
       [0, 1, 1, 1, 0, 2, 1, 0, 0, 1]])

In [201]:
a.argsort(axis=1)

array([[1, 3, 6, 2, 5, 9, 4, 0, 7, 8],
       [5, 3, 1, 4, 8, 0, 6, 7, 9, 2],
       [3, 1, 2, 9, 8, 5, 6, 7, 0, 4]])

### Получение уникальных элементов

In [202]:
np.unique(a)

array([0, 1, 2, 3, 4, 5, 6, 7])

### Объединение массивов

In [203]:
from itertools import chain, zip_longest

def print_as_columns(*args, sep='\t'):
    """
    print arrays as columns
    """
    args = list(map(lambda s: s.split('\n'), args))
    width = max(map(len, chain.from_iterable(args)))
    
    fill = lambda s: '{:<{width}s}'.format(s, width=width)
    fillvalue = fill('')
    
    args = map(lambda e: map(fill, e), args)
    args = map(sep.join, zip_longest(*args, fillvalue=fillvalue))
    print(*args, sep='\n')

In [204]:
np.random.seed(1398)

c = np.random.permutation(np.arange(0, 12))
i = c.shape[0] // 2

a, b = c[:i], c[i:]
a, b = a.reshape(3, 2), b.reshape(3, 2)

In [205]:
print_as_columns(repr(a), repr(b))
np.vstack((a, b))

array([[8, 4],   	array([[ 1,  2], 
       [0, 3],   	       [11,  5], 
       [7, 6]])  	       [ 9, 10]])


array([[ 8,  4],
       [ 0,  3],
       [ 7,  6],
       [ 1,  2],
       [11,  5],
       [ 9, 10]])

In [206]:
np.concatenate((a, b), axis=0)

array([[ 8,  4],
       [ 0,  3],
       [ 7,  6],
       [ 1,  2],
       [11,  5],
       [ 9, 10]])

In [207]:
print_as_columns(repr(a), repr(b))
np.hstack((a, b))

array([[8, 4],   	array([[ 1,  2], 
       [0, 3],   	       [11,  5], 
       [7, 6]])  	       [ 9, 10]])


array([[ 8,  4,  1,  2],
       [ 0,  3, 11,  5],
       [ 7,  6,  9, 10]])

In [208]:
np.concatenate((a, b), axis=1)

array([[ 8,  4,  1,  2],
       [ 0,  3, 11,  5],
       [ 7,  6,  9, 10]])

### Подсчет расстояний между точками

In [209]:
from scipy.spatial.distance import pdist, squareform

In [210]:
np.random.seed(9872)

X = np.random.random(size=(5, 2)) * 10
X

array([[ 0.33823417,  2.08502959],
       [ 1.20627581,  5.15557834],
       [ 4.34896139,  2.34885034],
       [ 9.38091212,  0.89160899],
       [ 5.77386694,  4.41725834]])

In [211]:
d = pdist(X)
d

array([ 3.19088795,  4.01939476,  9.1210897 ,  5.91484527,  4.21357267,
        9.219876  ,  4.62687855,  5.23870982,  2.51170609,  5.04390507])

In [212]:
squareform(d)

array([[ 0.        ,  3.19088795,  4.01939476,  9.1210897 ,  5.91484527],
       [ 3.19088795,  0.        ,  4.21357267,  9.219876  ,  4.62687855],
       [ 4.01939476,  4.21357267,  0.        ,  5.23870982,  2.51170609],
       [ 9.1210897 ,  9.219876  ,  5.23870982,  0.        ,  5.04390507],
       [ 5.91484527,  4.62687855,  2.51170609,  5.04390507,  0.        ]])

In [213]:
norm = np.linalg.norm(X, ord=2, axis=1)
norm

array([ 2.11228567,  5.29481722,  4.9427283 ,  9.42318836,  7.26978064])

In [214]:
squareform(pdist(X / norm[:, np.newaxis]))

array([[ 0.        ,  0.06900716,  0.88320724,  1.22245132,  0.73897735],
       [ 0.06900716,  0.        ,  0.82076742,  1.16710728,  0.67441346],
       [ 0.88320724,  0.82076742,  0.        ,  0.39777586,  0.15768922],
       [ 1.22245132,  1.16710728,  0.39777586,  0.        ,  0.55107649],
       [ 0.73897735,  0.67441346,  0.15768922,  0.55107649,  0.        ]])

In [215]:
pdist(X, metric='cosine')

array([ 0.00238099,  0.39002751,  0.74719361,  0.27304377,  0.33682958,
        0.6810697 ,  0.22741676,  0.07911282,  0.01243295,  0.15184265])

Последний совет на сегодня:
<center><h3>Не забываем читать документацию и пользоваться поиском</h3></center>