# Numpy

NumPy documentation:
https://numpy.org/doc/stable/

Наглядно о том, как работает NumPy: https://habr.com/ru/company/skillfactory/blog/564240/

## Создание массива

In [10]:
import numpy as np

In [11]:
np.__version__

'1.18.5'

In [12]:
python_list = [0, 1, 2, 3]

In [13]:
# Так создавать numpy-массив нельзя
wrong_numpy_array = np.array(0, 1, 2, 3)

ValueError: only 2 non-keyword arguments accepted

* из списков или кортежей Python

In [39]:
a = np.array([1,2,3,4])
a

array([1, 2, 3, 4])

In [40]:
type(a)

numpy.ndarray

In [41]:
python_list = [0, 1, 2, 3]
a = np.array(python_list)
a

array([0, 1, 2, 3])

In [42]:
python_tuple = (0, 1, 2, 3)
a = np.array(python_tuple)
a

array([0, 1, 2, 3])

* многомерные массивы

In [43]:
b = np.array([[0,1,2],[3,4,5]])
b

array([[0, 1, 2],
       [3, 4, 5]])

In [44]:
a.ndim

1

In [45]:
b.ndim

2

In [46]:
a.shape

(4,)

In [47]:
b.shape

(2, 3)

<img src=https://predictivehacks.com/wp-content/uploads/2020/08/numpy_arrays-1024x572.png width="600">

* нулевой массив

In [48]:
a = np.zeros((1,10))
a

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [49]:
a = np.zeros((4,3))
a

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

* единичный массив

In [50]:
a = np.ones((1,10))
a

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [51]:
a = np.ones((4,3))
a

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

* пустой массив со случайным заполнением 

In [52]:
a = np.empty((5,4))
a

array([[7.05877733e-312, 6.27463370e-322, 0.00000000e+000,
        0.00000000e+000],
       [8.90104238e-307, 5.30276956e+180, 4.42132230e-062,
        4.57222660e-071],
       [4.86450108e-086, 3.36009216e-143, 6.01433264e+175,
        6.93885958e+218],
       [5.56218858e+180, 3.94356143e+180, 2.33695973e-052,
        1.14416553e-071],
       [1.25580898e-075, 4.23104419e+175, 5.18691727e-144,
        1.50008929e+248]])

* массив такой же формы

In [53]:
b = np.zeros_like(a)
b

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [54]:
b.shape

(5, 4)

* массив из последовательности

In [55]:
# arange([start,] stop[, step,], dtype=None)
a = np.arange(0,10,2)
a

array([0, 2, 4, 6, 8])

какой будет последний элемент в массиве ниже?

In [56]:
a = np.arange(0,10,0.5)
a

array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. ,
       6.5, 7. , 7.5, 8. , 8.5, 9. , 9.5])

In [57]:
# np.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0)
a = np.linspace(0,99,num=15) # 15 значений с равными интервалами
a

array([ 0.        ,  7.07142857, 14.14285714, 21.21428571, 28.28571429,
       35.35714286, 42.42857143, 49.5       , 56.57142857, 63.64285714,
       70.71428571, 77.78571429, 84.85714286, 91.92857143, 99.        ])

In [58]:
a = np.linspace(0,99,num=15, endpoint=False) # 15 равных значений
a

array([ 0. ,  6.6, 13.2, 19.8, 26.4, 33. , 39.6, 46.2, 52.8, 59.4, 66. ,
       72.6, 79.2, 85.8, 92.4])

In [59]:
a = np.linspace(0,99,num=10, dtype=int) # dtype - тип данных
a

array([ 0, 11, 22, 33, 44, 55, 66, 77, 88, 99])

In [60]:
a = np.linspace(0,99,num=10)
a

array([ 0., 11., 22., 33., 44., 55., 66., 77., 88., 99.])

In [67]:
a = np.logspace(1,3,3,base=10)
a

array([  10.,  100., 1000.])

#### сравним скорость вычислений типов list и numpy.ndarray

In [20]:
%timeit [i**2 for i in range(1000)]

336 µs ± 12.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [21]:
%timeit np.arange(1000)**2

2.94 µs ± 135 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Типы данных

https://numpy.org/doc/stable/user/basics.types.html

* тип данных по-умолчанию

In [68]:
a = np.array([0, 1, 2, 3])

In [69]:
a.dtype

dtype('int32')

In [70]:
a = np.array([1.3, 0.5, 4])
a.dtype

dtype('float64')

In [71]:
a = np.array([1+2j, 3+4j, 5+6*1j])
a.dtype

dtype('complex128')

In [72]:
a = np.array([True, False, False, True])
a.dtype

dtype('bool')

In [73]:
a = np.array(['dgf', 0.5, 4]) # строковый тип данных
a.dtype

dtype('<U3')

In [81]:
a = np.array(['Bonjo12345werwer67', 'Hello', 'Hallo',])
a.dtype

dtype('<U18')

In [82]:
a1 = 12313241545646848648645645645645412786873132156465456465464

In [83]:
a = np.array(a1)
a.dtype # object

dtype('O')

In [84]:
np.iinfo(int) # Bounds of the default integer on this system.

iinfo(min=-2147483648, max=2147483647, dtype=int32)

In [85]:
np.iinfo(np.int32) # Bounds of a 32-bit integer

iinfo(min=-2147483648, max=2147483647, dtype=int32)

In [86]:
np.iinfo(np.int64) # Bounds of a 64-bit integer

iinfo(min=-9223372036854775808, max=9223372036854775807, dtype=int64)

* явное указание типа данных

In [87]:
a = np.array([0, 1, 2, 3], dtype=np.int8)
a.dtype

dtype('int8')

In [88]:
a

array([0, 1, 2, 3], dtype=int8)

In [89]:
a = np.array([0, 1, 2, 3, -1], dtype=np.bool)
a # все, что <> 0 - False

array([False,  True,  True,  True,  True])

In [90]:
a = np.array([1.3, 0.8, 4], dtype=np.int8)
a

array([1, 0, 4], dtype=int8)

In [91]:
a = np.array(['dgf', 0.5, 4], dtype=np.int8)

ValueError: invalid literal for int() with base 10: 'dgf'

In [92]:
a = np.array(['dgf', 0.5, 4], dtype=np.bool)
a

array([ True,  True,  True])

In [93]:
a = np.array(['7', 0.5, 4], dtype=np.int8)

In [94]:
a

array([7, 0, 4], dtype=int8)

In [95]:
a = np.array(['7.2', 0.5, 4], dtype=np.float)
a

array([7.2, 0.5, 4. ])

* None и np.nan

In [96]:
None is None # Замена для `id(None) == id(None)`

True

In [97]:
None == None, None is None

(True, True)

In [98]:
np.nan == np.nan, np.nan is np.nan

(False, True)

In [99]:
np.nan is None, np.nan == None

(False, False)

In [100]:
type(np.nan)

float

In [101]:
type(None)

NoneType

In [102]:
np.isnan(np.nan)

True

In [103]:
np.isnan(None)

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [104]:
a = np.array([0,1,np.nan,3])
b = np.array([0,1,2,3])

In [105]:
c = a+b
c

array([ 0.,  2., nan,  6.])

In [106]:
a = np.array([0,1,None,3])
b = np.array([0,1,2,3])

In [107]:
a+b

TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'

In [108]:
a = np.array([0,1,np.nan,3])
a.dtype

dtype('float64')

## Индексирование

<img src=https://www.oreilly.com/library/view/python-for-data/9781449323592/httpatomoreillycomsourceoreillyimages2172112.png width="400">

* одномерный массив

In [109]:
a = np.arange(10,20)
a

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [110]:
a[0]

10

In [111]:
a[5]

15

In [112]:
a[9]

19

In [113]:
# [start:end:step]
a[1:6]

array([11, 12, 13, 14, 15])

In [114]:
a[1:6:2]

array([11, 13, 15])

In [115]:
a[:6]

array([10, 11, 12, 13, 14, 15])

In [116]:
a[6:]

array([16, 17, 18, 19])

In [117]:
a[:]

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [118]:
a[-1]

19

In [119]:
a[-2]

18

In [120]:
a[-10]

10

In [121]:
a[10]

IndexError: index 10 is out of bounds for axis 0 with size 10

In [122]:
a[::-1]

array([19, 18, 17, 16, 15, 14, 13, 12, 11, 10])

In [123]:
a[::-2]

array([19, 17, 15, 13, 11])

In [124]:
a

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [125]:
a[-1:-5:-1]

array([19, 18, 17, 16])

* многомерный массив

<img src=https://www.oreilly.com/library/view/python-for-data/9781449323592/httpatomoreillycomsourceoreillyimages2172114.png width="400">

In [126]:
a = np.array([[0,1,2], [3,4,5], [6,7,8], [9,10,11]])
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [127]:
a.shape

(4, 3)

In [128]:
a[2,1]

7

In [129]:
a[2]

array([6, 7, 8])

In [130]:
a[:,1]

array([ 1,  4,  7, 10])

In [131]:
a[1:, 2]

array([ 5,  8, 11])

In [132]:
a[1:3,1:3]

array([[4, 5],
       [7, 8]])

In [133]:
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [134]:
a[::-1]

array([[ 9, 10, 11],
       [ 6,  7,  8],
       [ 3,  4,  5],
       [ 0,  1,  2]])

## Изменение размерности

In [135]:
a = np.array([[0,1,2], [3,4,5], [6,7,8], [9,10,11]])
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [136]:
a.shape

(4, 3)

In [137]:
b = a.flatten()
b

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [138]:
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [139]:
a.reshape((6,2))

array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11]])

In [140]:
a.reshape((3,4))

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [141]:
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [142]:
a.shape

(4, 3)

In [143]:
a.resize((3,4))
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [144]:
a.shape

(3, 4)

In [145]:
a.reshape(-1,6)

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

In [146]:
a.reshape(-1,5)

ValueError: cannot reshape array of size 12 into shape (5)

In [147]:
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [148]:
a.T

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [149]:
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

## Рандом

In [150]:
# uniform in [0, 1)
a = np.random.rand(4, 4)
a

array([[0.414675  , 0.84993281, 0.53112578, 0.49659395],
       [0.7659892 , 0.37695823, 0.63260143, 0.53190593],
       [0.98627998, 0.9505134 , 0.40381579, 0.09581576],
       [0.96484455, 0.12267483, 0.67942424, 0.00313718]])

In [151]:
# uniform in [0, 5)
a = 5 * np.random.rand(4, 4)
a

array([[1.58119341, 2.36080876, 4.93985834, 3.93708491],
       [1.25046414, 2.89883876, 4.62246247, 4.4467012 ],
       [0.79879902, 0.53298808, 3.15771761, 4.56655403],
       [3.46231954, 2.19748943, 2.64827439, 0.67444183]])

In [152]:
# Gaussian ("standard normal" distribution)
b =  np.random.randn(4,4)
b  

array([[-1.83669075, -0.12001425,  0.18065511, -0.19353806],
       [ 0.12409175, -1.05832324,  1.29357441,  0.61820118],
       [-0.96747623,  0.58195608, -0.37589519,  0.35539983],
       [-2.82091153, -0.0795171 ,  0.01072411,  2.04395292]])

In [153]:
# Gaussian ("standard normal" distribution)
b =  5 * np.random.randn(4,4) + 1 
b  

array([[-2.20796263,  0.01606991, -3.25596864,  1.54948508],
       [ 0.40129301, -8.40904886,  1.44574465, -1.97465815],
       [ 2.8396563 ,  4.92700665,  0.20767038,  8.88317483],
       [-0.76407885, -6.17127268, -5.35368327,  9.03214486]])

In [154]:
# Integer uniform
c = np.random.randint(0, 10, 10)
c

array([6, 8, 0, 2, 9, 9, 3, 1, 7, 4])

* фиксация рандома

In [155]:
import numpy as np

In [156]:
np.random.seed(123)

In [157]:
np.random.randn(3)

array([-1.0856306 ,  0.99734545,  0.2829785 ])

In [158]:
np.random.randn(4)

array([-1.50629471, -0.57860025,  1.65143654, -2.42667924])

In [159]:
from numpy.random import MT19937
from numpy.random import RandomState, SeedSequence

In [160]:
rs = RandomState(MT19937(SeedSequence(123456789))) # более сложная фиксация данных 

In [161]:
rs.randn(3)

array([-0.43411991, -0.47617187,  0.81874471])

In [162]:
rs.randn(4)

array([ 0.91882442,  0.58222651, -0.34206709, -0.90987522])

## Выбор по условию (маски)

In [163]:
np.random.seed(3)
a = np.random.randint(0, 20, 15)
a

array([10,  3,  8,  0, 19, 10, 11,  9, 10,  6,  0, 12,  7, 14, 17])

In [164]:
a[a > 10]

array([19, 11, 12, 14, 17])

In [165]:
mask = a>10
mask

array([False, False, False, False,  True, False,  True, False, False,
       False, False,  True, False,  True,  True])

In [166]:
a[mask]

array([19, 11, 12, 14, 17])

In [178]:
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 5,  6,  7],
       [ 8,  9, 10]])

In [167]:
mask = (a>5) & (a<10)
a[mask]

array([8, 9, 6, 7])

In [180]:
mask = (a % 3 == 0)
print(mask)
extract_from_a = a[mask]
extract_from_a

[[False False False]
 [False False False]
 [False False False]
 [False False False]]


array([], dtype=int32)

In [181]:
a

array([[-1,  1,  2],
       [-1,  4,  5],
       [ 5, -1,  7],
       [ 8, -1, 10]])

In [182]:
a[a % 3 == 0] = -1
a

array([[-1,  1,  2],
       [-1,  4,  5],
       [ 5, -1,  7],
       [ 8, -1, 10]])

In [183]:
a[0] = 100

In [184]:
a

array([[100, 100, 100],
       [ -1,   4,   5],
       [  5,  -1,   7],
       [  8,  -1,  10]])

In [185]:
a[:3] = 100

In [186]:
a

array([[100, 100, 100],
       [100, 100, 100],
       [100, 100, 100],
       [  8,  -1,  10]])

In [187]:
a = np.array([[0,1,2],[3,4,5],[5,6,7],[8,9,10]])
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 5,  6,  7],
       [ 8,  9, 10]])

In [188]:
def flt(a):
    return a>5;

In [189]:
a[flt(a)]

array([ 6,  7,  8,  9, 10])

## Математические операции

* одномерные массивы

In [190]:
a = np.array([10,20,30,40])
b = np.array([1,2,3,4])

In [191]:
a

array([10, 20, 30, 40])

In [192]:
b

array([1, 2, 3, 4])

In [193]:
a + 5

array([15, 25, 35, 45])

In [194]:
a * 5

array([ 50, 100, 150, 200])

In [195]:
a - b

array([ 9, 18, 27, 36])

In [196]:
a + b

array([11, 22, 33, 44])

In [197]:
a * b

array([ 10,  40,  90, 160])

In [198]:
a / b

array([10., 10., 10., 10.])

In [199]:
a ** b

array([     10,     400,   27000, 2560000], dtype=int32)

* многомерные массивы

In [200]:
A = np.array([[10,20],[30,40]])
B = np.array([[1,2],[3,4]])

In [201]:
A

array([[10, 20],
       [30, 40]])

In [202]:
B

array([[1, 2],
       [3, 4]])

In [203]:
A - B

array([[ 9, 18],
       [27, 36]])

In [204]:
A + B

array([[11, 22],
       [33, 44]])

In [205]:
A * B

array([[ 10,  40],
       [ 90, 160]])

In [206]:
A / B

array([[10., 10.],
       [10., 10.]])

In [207]:
A ** B

array([[     10,     400],
       [  27000, 2560000]], dtype=int32)

In [208]:
A.dot(B)

array([[ 70, 100],
       [150, 220]])

In [211]:
C = np.array([[1,2],[3,4],[5,6]])
C

array([[1, 2],
       [3, 4],
       [5, 6]])

In [212]:
A.dot(C)

ValueError: shapes (2,2) and (3,2) not aligned: 2 (dim 1) != 3 (dim 0)

* использование функций

In [213]:
a

array([10, 20, 30, 40])

In [222]:
a.sum(), np.sum(a)

(100, 100)

In [223]:
a.prod(), np.prod(a) # умножение всех элементов

(240000, 240000)

In [224]:
np.sqrt(a)

array([3.16227766, 4.47213595, 5.47722558, 6.32455532])

In [225]:
np.log(a)

array([2.30258509, 2.99573227, 3.40119738, 3.68887945])

In [226]:
a.min(), a.max()

(10, 40)

In [227]:
a.mean(), np.mean(a)

(25.0, 25.0)

In [228]:
np.median(a)

25.0

In [229]:
a.std(), np.std(a)

(11.180339887498949, 11.180339887498949)

In [230]:
a

array([10, 20, 30, 40])

In [231]:
a.argmin(), a.argmax()

(0, 3)

## Другие полезные функции

In [232]:
a

array([10, 20, 30, 40])

* проверка вхождения в массив

In [233]:
2 in a

False

In [234]:
20 in a

True

* преобразование в список

In [235]:
a.tolist()

[10, 20, 30, 40]

* сортировка списка

In [236]:
a = np.array([40,10,20,5])
a

array([40, 10, 20,  5])

In [237]:
a.sort()

In [238]:
a

array([ 5, 10, 20, 40])

* заполение массива одинаковым значением

In [239]:
a

array([ 5, 10, 20, 40])

In [240]:
a.fill(7)

In [241]:
a

array([7, 7, 7, 7])

## Копирование

In [242]:
a = np.array([[ 0,  1,  2,  3],
              [ 4,  5,  6,  7],
              [ 8,  9, 10, 11]])
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [243]:
b = a
b

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [244]:
a[1,1] = 555

In [245]:
a

array([[  0,   1,   2,   3],
       [  4, 555,   6,   7],
       [  8,   9,  10,  11]])

In [246]:
b

array([[  0,   1,   2,   3],
       [  4, 555,   6,   7],
       [  8,   9,  10,  11]])

In [247]:
a is b

True

In [248]:
id(a), id(b)

(1428841322176, 1428841322176)

In [249]:
b = a.copy()

In [250]:
a is b

False

In [251]:
id(a), id(b)

(1428841322176, 1428840987264)

In [252]:
a

array([[  0,   1,   2,   3],
       [  4, 555,   6,   7],
       [  8,   9,  10,  11]])

In [253]:
b

array([[  0,   1,   2,   3],
       [  4, 555,   6,   7],
       [  8,   9,  10,  11]])

In [254]:
a[1,1] = 5
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [255]:
b

array([[  0,   1,   2,   3],
       [  4, 555,   6,   7],
       [  8,   9,  10,  11]])