# Основы Numpy

## Интро
### Зачем нам вообще Numpy?

In [1]:
import numpy as np

In [6]:
# сделаем две матрицы 300х300
n = 300
A = np.random.rand(n, n)
B = np.random.rand(n, n)

А теперь попробуем провести матричное умножение.

In [3]:
%%time
C = np.zeros((n, n))
for i in range(n):
    for j in range(n):
        for k in range(n):
            C[i, j] += A[i, k] * B[k, j]

CPU times: total: 1.11 s
Wall time: 18.2 s


In [7]:
%%time
C = A @ B

CPU times: total: 3min 3s
Wall time: 14.3 s


Нумпай быстрее циклов питона во много раз!

### Array

Как работать с numpy? Создаем numpy.array

In [5]:
x1 = [1, 2, 3]
x2 = (1, 2, 3)
x3 = {1, 2, 3}
x4 = 'привет'
x5 = 2.5
x6 = 1
x7 = {'1': 1, '2': 2}
x = [x1, x2, x3, x4, x5, x6, x7]

In [6]:
for obj in x:
    print(np.array(obj))

[1 2 3]
[1 2 3]
{1, 2, 3}
привет
2.5
1
{'1': 1, '2': 2}


In [7]:
np.array(x1)

array([1, 2, 3])

In [8]:
np.array(x2)

array([1, 2, 3])

In [9]:
np.array(x3)  # не делайте так

array({1, 2, 3}, dtype=object)

In [11]:
np.array(x3)[0]  # вот почему

IndexError: too many indices for array: array is 0-dimensional, but 1 were indexed

In [12]:
np.array(x4)

array('привет', dtype='<U6')

In [13]:
np.array(x5)

array(2.5)

In [14]:
np.array(x6)

array(1)

In [15]:
np.array(x7)

array({'1': 1, '2': 2}, dtype=object)

Всегда можно преобразовать в numpy array? Нет!

Нужно, чтобы все элементы были одинакового размера и типа (с учетом каста).

In [16]:
x = [[1, 2, 3], [1, 2]]  # почему
np.array(x)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

In [17]:
x = [[1, 2, 3], [1, 2, 3]]
np.array(x)

array([[1, 2, 3],
       [1, 2, 3]])

In [18]:
x = [[1, 2, 3.5], [1, 2, 3]]
np.array(x)

array([[1. , 2. , 3.5],
       [1. , 2. , 3. ]])

In [22]:
x = [[1, '2', 3.5], [1, 2, 3]]
np.array(x)

array([['1', '2', '3.5'],
       ['1', '2', '3']], dtype='<U32')

**Подсказка:** string > float > int

### Специальные массивы

In [23]:
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [24]:
np.zeros((3, 2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [25]:
np.ones((5, 6))

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [26]:
np.eye(3, 4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.]])

In [27]:
a = np.zeros((3, 2))
b = np.zeros_like(a)
b

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

Также специальные матрицы доступны из модулей нумпай

In [28]:
np.random  # module

<module 'numpy.random' from 'C:\\Users\\artem\\.conda\\envs\\default\\lib\\site-packages\\numpy\\random\\__init__.py'>

In [29]:
dir(np.random)

['BitGenerator',
 'Generator',
 'MT19937',
 'PCG64',
 'PCG64DXSM',
 'Philox',
 'RandomState',
 'SFC64',
 'SeedSequence',
 '__RandomState_ctor',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_bounded_integers',
 '_common',
 '_generator',
 '_mt19937',
 '_pcg64',
 '_philox',
 '_pickle',
 '_sfc64',
 'beta',
 'binomial',
 'bit_generator',
 'bytes',
 'chisquare',
 'choice',
 'default_rng',
 'dirichlet',
 'exponential',
 'f',
 'gamma',
 'geometric',
 'get_bit_generator',
 'get_state',
 'gumbel',
 'hypergeometric',
 'laplace',
 'logistic',
 'lognormal',
 'logseries',
 'mtrand',
 'multinomial',
 'multivariate_normal',
 'negative_binomial',
 'noncentral_chisquare',
 'noncentral_f',
 'normal',
 'pareto',
 'permutation',
 'poisson',
 'power',
 'rand',
 'randint',
 'randn',
 'random',
 'random_integers',
 'random_sample',
 'ranf',
 'rayleigh',
 'sample',
 'seed',
 'set_bit_generator',
 'set_state',
 'shuf

In [30]:
np.random.rand(2, 3)  # uniform distribution

array([[0.49640599, 0.44276285, 0.38728195],
       [0.19430574, 0.6959761 , 0.37251253]])

In [33]:
np.random.randint(0, 10, (3, 4))  # random integers

array([[7, 6, 3, 1],
       [4, 4, 9, 9],
       [7, 9, 4, 8]])

Как всегда получать одинаковую случайную матрицу?

In [37]:
np.random.seed(42)  # фиксируем ПОРЯДОК случайностей

In [38]:
np.random.seed(42)
np.random.rand(2, 3)

array([[0.37454012, 0.95071431, 0.73199394],
       [0.59865848, 0.15601864, 0.15599452]])

In [39]:
np.random.seed(42)
np.random.rand(2, 3)

array([[0.37454012, 0.95071431, 0.73199394],
       [0.59865848, 0.15601864, 0.15599452]])

In [40]:
np.random.seed(123)
x1 = np.random.rand(2, 3)
np.random.seed(123)
x2 = np.random.rand(2, 3)
x1 == x2

array([[ True,  True,  True],
       [ True,  True,  True]])

In [41]:
np.random.seed(123)
x1 = np.random.rand(2, 3)  # first random
x2 = np.random.rand(2, 3)  # second random
x1 == x2

array([[False, False, False],
       [False, False, False]])

In [42]:
np.random.seed(123)
x1 = np.random.rand(2, 3)
x2 = np.random.rand(2, 3)

np.random.seed(123)
x3 = np.random.rand(2, 3)
x4 = np.random.rand(2, 3)

In [43]:
x1 == x3

array([[ True,  True,  True],
       [ True,  True,  True]])

In [44]:
x2 == x4

array([[ True,  True,  True],
       [ True,  True,  True]])

In [45]:
x1 == x4

array([[False, False, False],
       [False, False, False]])

### Специальные массивы из последовательностей

In [46]:
np.linspace(0, 100, 10)  # take 10 equal parts from [0, 100]

array([  0.        ,  11.11111111,  22.22222222,  33.33333333,
        44.44444444,  55.55555556,  66.66666667,  77.77777778,
        88.88888889, 100.        ])

In [47]:
np.linspace(0, 99, num=11)

array([ 0. ,  9.9, 19.8, 29.7, 39.6, 49.5, 59.4, 69.3, 79.2, 89.1, 99. ])

In [48]:
np.linspace(0, 99, num=11, dtype=int)  # явно укажем, что нужны целые числа

array([ 0,  9, 19, 29, 39, 49, 59, 69, 79, 89, 99])

In [49]:
np.linspace(0, 99, num=11, dtype=int, endpoint=False)

array([ 0,  9, 18, 27, 36, 45, 54, 63, 72, 81, 90])

In [44]:
np.logspace(1, 3, 5, base=10)

array([  10.        ,   31.6227766 ,  100.        ,  316.22776602,
       1000.        ])

In [50]:
np.logspace(1, 3, 5, base=2)

array([2.        , 2.82842712, 4.        , 5.65685425, 8.        ])

In [51]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [52]:
np.arange(1, 100, 7)  # [начало:конец:шаг]

array([ 1,  8, 15, 22, 29, 36, 43, 50, 57, 64, 71, 78, 85, 92, 99])

### Сравнение по скорости и памяти

#### Скорость (время на выполнение операции)

In [53]:
%%timeit
x1 = [i**2 for i in range(1000)]

252 µs ± 2.95 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [54]:
%%timeit
x2 = np.arange(1000)**2

4.34 µs ± 68 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


#### Память

In [55]:
import sys

In [56]:
x1 = [i**2 for i in range(1000)]
sys.getsizeof(x1)

8856

In [57]:
x2 = np.arange(1000)**2
sys.getsizeof(x2)

4112

### Функции и атрибуты

#### Типы и кастинг

In [58]:
x = [[1, 2, 3], [1, 2, 3]]
x = np.array(x)
x

array([[1, 2, 3],
       [1, 2, 3]])

In [59]:
x.dtype  # тип всех элементов

dtype('int32')

In [60]:
x = [[1, 2, 3.5], [1, 2, 3]]
x = np.array(x)
x

array([[1. , 2. , 3.5],
       [1. , 2. , 3. ]])

In [61]:
x.dtype  # тип всех элементов

dtype('float64')

In [62]:
x = [[1, '2', 3.5], [1, 2, 3]]
x = np.array(x)
x

array([['1', '2', '3.5'],
       ['1', '2', '3']], dtype='<U32')

In [63]:
x.dtype  # тип всех элементов

dtype('<U32')

In [64]:
x[0][0]

'1'

In [65]:
x[0][0].dtype

dtype('<U1')

#### Shape и reshape

In [66]:
x = np.arange(1, 21)  # 1d array
x

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20])

In [67]:
x.shape  # 20 строк

(20,)

In [68]:
x.ndim  # только одна размерность == 1d array

1

In [69]:
x = x.reshape((2, 10))  # меняем размерность, теперь 2 строки и 10 столбцов
x

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]])

In [70]:
x.shape

(2, 10)

In [71]:
x.ndim

2

In [72]:
x.reshape((-1,))  # а теперь вернем все назад, "вытянем" в вектор

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20])

In [73]:
x = x.reshape((-1,))  # не забываем присвоить значение

In [74]:
x.reshape((2, -1))  # вдруг вы не знаеете, сколько колонок должно быть

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]])

In [78]:
x.reshape((2, 5, 2))  # можно и 3d array сделать

array([[[ 1,  2],
        [ 3,  4],
        [ 5,  6],
        [ 7,  8],
        [ 9, 10]],

       [[11, 12],
        [13, 14],
        [15, 16],
        [17, 18],
        [19, 20]]])

In [79]:
x.reshape((2, 5, 2)).shape, x.reshape((2, 5, 2)).ndim

((2, 5, 2), 3)

In [84]:
x.reshape((2, -1, -1))  # только один раз можно поставить "-1"

ValueError: can only specify one unknown dimension

In [81]:
# почему?
x.reshape((2, 5, 2)).shape, x.reshape(2, 10).shape, x.shape

((2, 5, 2), (2, 10), (20,))

#### Задача 2

Имеющийся двумерный список X нужно сделать (1) одномерным, (2) трехмерным

In [85]:
lst = [[1, 2, 3], [2, 3, 4], [3, 4, 5]]
lst

[[1, 2, 3], [2, 3, 4], [3, 4, 5]]

In [86]:
[elem for pack in lst for elem in pack]

[1, 2, 3, 2, 3, 4, 3, 4, 5]

In [87]:
%%timeit
[elem for pack in lst for elem in pack]  # сложно, долго, неуниверсально

518 ns ± 15.6 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [88]:
np.array(lst).reshape((-1,))

array([1, 2, 3, 2, 3, 4, 3, 4, 5])

In [89]:
%%timeit
np.array(lst).reshape((-1,))

1.77 µs ± 15.4 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [90]:
lst_new = np.array(lst)

In [91]:
%%timeit
lst_new.reshape((-1,))

216 ns ± 1.97 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


#### Функции и методы

In [92]:
x = np.arange(1, 21)
x = x.reshape((5, 4))
x

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20]])

In [93]:
x.mean()  # среднее массива

10.5

In [94]:
x.mean(axis=0)  # среднее по размерностям (столбцы)

array([ 9., 10., 11., 12.])

In [95]:
x.mean(axis=1)  # среднее по строкам

array([ 2.5,  6.5, 10.5, 14.5, 18.5])

In [96]:
x.mean(axis=2)  # а нету 3 размерности, их всего 2 штуки

AxisError: axis 2 is out of bounds for array of dimension 2

In [97]:
np.mean(x) == x.mean()  # функции numpy дают тот же результат, что и методы

True

In [98]:
np.mean([1, 2, 3])  # но функциям можно давать не только numpy.array на вход

2.0

In [122]:
np.std(x, axis=1)

array([1.11803399, 1.11803399, 1.11803399, 1.11803399, 1.11803399])

In [100]:
np.median(x, axis=0)

array([ 9., 10., 11., 12.])

In [102]:
np.linalg.norm(x)

53.5723809439155

In [103]:
x

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20]])

In [104]:
x.transpose()

array([[ 1,  5,  9, 13, 17],
       [ 2,  6, 10, 14, 18],
       [ 3,  7, 11, 15, 19],
       [ 4,  8, 12, 16, 20]])

In [105]:
x.T

array([[ 1,  5,  9, 13, 17],
       [ 2,  6, 10, 14, 18],
       [ 3,  7, 11, 15, 19],
       [ 4,  8, 12, 16, 20]])

In [106]:
x.T == x.transpose()

array([[ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])

In [111]:
x = np.random.rand(5, 5)  # квадратная матрица
x

array([[0.89338916, 0.94416002, 0.50183668, 0.62395295, 0.1156184 ],
       [0.31728548, 0.41482621, 0.86630916, 0.25045537, 0.48303426],
       [0.98555979, 0.51948512, 0.61289453, 0.12062867, 0.8263408 ],
       [0.60306013, 0.54506801, 0.34276383, 0.30412079, 0.41702221],
       [0.68130077, 0.87545684, 0.51042234, 0.66931378, 0.58593655]])

In [112]:
np.linalg.det(x)

-0.005937868597278331

In [113]:
b = np.array([1, 2, 7, 4, 5])

In [114]:
np.linalg.solve(x, b)  # решить, как систему линейных уравнений Ax=b

array([-2.71900294, 10.72861512, -4.7615233 , -8.72011248,  9.77392275])

### Операции

In [116]:
x = np.random.rand(5)
y = np.random.rand(5)

In [117]:
x

array([0.6249035 , 0.67468905, 0.84234244, 0.08319499, 0.76368284])

In [118]:
y

array([0.24366637, 0.19422296, 0.57245696, 0.09571252, 0.88532683])

Обычно для операций необходимо, чтобы совпадали (1) типы и (2) размеры массивов

In [119]:
x == y

array([False, False, False, False, False])

Операции выполняются по умолчанию поэлементно

In [120]:
x + y

array([0.86856988, 0.86891201, 1.4147994 , 0.1789075 , 1.64900967])

In [121]:
x * y

array([0.15226797, 0.1310401 , 0.48220479, 0.0079628 , 0.67610891])

In [122]:
x ** y

array([0.89175691, 0.92642018, 0.90645339, 0.78820608, 0.78766177])

In [123]:
x - x

array([0., 0., 0., 0., 0.])

In [124]:
(x + 1 + y * 2) / 5

array([0.42244725, 0.41262699, 0.59745127, 0.254924  , 0.7068673 ])

$\frac{1}{N}\sum_{i=1}^{N}(x-y)^2$

In [147]:
((x - y) ** 2).mean()

0.21160631200932078

Есть и непоэлементные операции

In [125]:
x.dot(y)  # матричное множение

1.4495845727486578

In [126]:
x.dot(y.T)

1.4495845727486578

In [127]:
(x * y).sum()

1.4495845727486578

In [128]:
x = np.random.rand(3, 4)
y = np.random.rand(7, 4)

In [129]:
x.dot(y)  # общая размерность должна совпадать

ValueError: shapes (3,4) and (7,4) not aligned: 4 (dim 1) != 7 (dim 0)

In [130]:
x.shape, y.shape

((3, 4), (7, 4))

In [131]:
x.shape, y.T.shape

((3, 4), (4, 7))

Порядок умножения метриц имеет значение!

In [133]:
x.dot(y.T), x.dot(y.T).shape

(array([[1.22084294, 1.08226274, 1.19562387, 0.59139419, 0.88362256,
         1.02307574, 1.03004817],
        [0.73391781, 1.03319543, 0.89127938, 0.4233025 , 0.49892133,
         0.79881803, 0.8318126 ],
        [1.09240196, 1.15064511, 1.2389993 , 0.59129879, 0.73447797,
         1.24489811, 0.85208045]]),
 (3, 7))

In [134]:
y.dot(x.T), y.dot(x.T).shape

(array([[1.22084294, 0.73391781, 1.09240196],
        [1.08226274, 1.03319543, 1.15064511],
        [1.19562387, 0.89127938, 1.2389993 ],
        [0.59139419, 0.4233025 , 0.59129879],
        [0.88362256, 0.49892133, 0.73447797],
        [1.02307574, 0.79881803, 1.24489811],
        [1.03004817, 0.8318126 , 0.85208045]]),
 (7, 3))

Еще немного магии с индексами и операциями

In [135]:
x

array([[0.62724897, 0.72341636, 0.01612921, 0.59443188],
       [0.55678519, 0.15895964, 0.15307052, 0.69552953],
       [0.31876643, 0.6919703 , 0.55438325, 0.38895057]])

In [136]:
x[0]  # первая строка

array([0.62724897, 0.72341636, 0.01612921, 0.59443188])

In [137]:
x[:, 0]  # первая колонка

array([0.62724897, 0.55678519, 0.31876643])

In [138]:
x[0, 0]  # элемент из первой строки и первого столбца

0.6272489720512687

In [139]:
x[[1, 2], 0]  # 1 и 2 строки, но только 0 столбец

array([0.55678519, 0.31876643])

In [140]:
x[-1, [0, -1]]  # оследняя строка, но только первый и последжний столбцы

array([0.31876643, 0.38895057])

In [141]:
x[:, :]

array([[0.62724897, 0.72341636, 0.01612921, 0.59443188],
       [0.55678519, 0.15895964, 0.15307052, 0.69552953],
       [0.31876643, 0.6919703 , 0.55438325, 0.38895057]])

In [142]:
x[0:2, 2:]

array([[0.01612921, 0.59443188],
       [0.15307052, 0.69552953]])

In [143]:
x

array([[0.62724897, 0.72341636, 0.01612921, 0.59443188],
       [0.55678519, 0.15895964, 0.15307052, 0.69552953],
       [0.31876643, 0.6919703 , 0.55438325, 0.38895057]])

In [144]:
x.cumsum()

array([0.62724897, 1.35066533, 1.36679454, 1.96122642, 2.51801161,
       2.67697125, 2.83004177, 3.5255713 , 3.84433772, 4.53630802,
       5.09069127, 5.47964184])

In [145]:
x.cumsum(axis=1)

array([[0.62724897, 1.35066533, 1.36679454, 1.96122642],
       [0.55678519, 0.71574484, 0.86881535, 1.56434488],
       [0.31876643, 1.01073672, 1.56511997, 1.95407055]])

In [146]:
x.cumsum(axis=0)

array([[0.62724897, 0.72341636, 0.01612921, 0.59443188],
       [1.18403416, 0.882376  , 0.16919972, 1.28996141],
       [1.50280059, 1.5743463 , 0.72358297, 1.67891198]])

In [147]:
np.diff(x, axis=0)

array([[-0.07046378, -0.56445671,  0.13694131,  0.10109765],
       [-0.23801877,  0.53301065,  0.40131273, -0.30657895]])

In [148]:
x

array([[0.62724897, 0.72341636, 0.01612921, 0.59443188],
       [0.55678519, 0.15895964, 0.15307052, 0.69552953],
       [0.31876643, 0.6919703 , 0.55438325, 0.38895057]])

In [149]:
x[0]

array([0.62724897, 0.72341636, 0.01612921, 0.59443188])

In [150]:
x - x[0]

array([[ 0.        ,  0.        ,  0.        ,  0.        ],
       [-0.07046378, -0.56445671,  0.13694131,  0.10109765],
       [-0.30848255, -0.03144606,  0.53825404, -0.20548131]])

In [151]:
x[:, 0]

array([0.62724897, 0.55678519, 0.31876643])

In [152]:
x - x[:, 0]

ValueError: operands could not be broadcast together with shapes (3,4) (3,) 

In [153]:
x[:, 0].reshape(-1, 1)

array([[0.62724897],
       [0.55678519],
       [0.31876643]])

In [154]:
x - x[:, 0].reshape(-1, 1)

array([[ 0.        ,  0.09616739, -0.61111977, -0.03281709],
       [ 0.        , -0.39782555, -0.40371468,  0.13874434],
       [ 0.        ,  0.37320387,  0.23561682,  0.07018415]])

In [156]:
x = np.random.rand(5, 5)
x

array([[0.20454286, 0.45063649, 0.54776357, 0.09332671, 0.29686078],
       [0.92758424, 0.56900373, 0.457412  , 0.75352599, 0.74186215],
       [0.04857903, 0.7086974 , 0.83924335, 0.16593788, 0.78099794],
       [0.28653662, 0.30646975, 0.66526147, 0.11139217, 0.66487245],
       [0.88785679, 0.69631127, 0.44032788, 0.43821438, 0.7650961 ]])

In [157]:
x - x[0]

array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.72304138,  0.11836724, -0.09035158,  0.66019928,  0.44500138],
       [-0.15596383,  0.2580609 ,  0.29147978,  0.07261117,  0.48413716],
       [ 0.08199376, -0.14416674,  0.11749789,  0.01806546,  0.36801167],
       [ 0.68331393,  0.24567478, -0.1074357 ,  0.34488767,  0.46823532]])

In [158]:
x[0].shape

(5,)

In [160]:
x - x[:, 0].reshape(-1, 1)

array([[ 0.        ,  0.24609363,  0.34322071, -0.11121615,  0.09231792],
       [ 0.        , -0.35858051, -0.47017224, -0.17405825, -0.18572209],
       [ 0.        ,  0.66011836,  0.79066431,  0.11735885,  0.73241891],
       [ 0.        ,  0.01993314,  0.37872485, -0.17514445,  0.37833583],
       [ 0.        , -0.19154552, -0.44752892, -0.44964241, -0.1227607 ]])

In [162]:
x[0].reshape(-1, 1).shape

(5, 1)

#### Задача 1

Для матриц X, y найти решение уравнения:

$w^*=(X^TX)^{-1}X^Ty$

In [163]:
X = np.random.rand(1000, 50)
w = np.random.rand(1, 50)
y = np.random.rand(1000)

In [164]:
np.linalg.inv(np.dot(X.T, X)).dot(X.T).dot(y)

array([-0.03814551,  0.03645866,  0.03521346, -0.00807001,  0.05291701,
        0.03019068, -0.04175205,  0.02454012,  0.06759232,  0.01877909,
        0.02709455,  0.01080815, -0.07598631,  0.06482881, -0.01703838,
       -0.03379394,  0.06016556,  0.01196782, -0.00294272,  0.01460045,
        0.00275138,  0.03116072,  0.02896452,  0.02596006,  0.00654942,
        0.03044851,  0.02713273,  0.02005469,  0.09285958, -0.00695642,
       -0.01072229,  0.0471703 ,  0.03871534, -0.02875413,  0.05063561,
        0.0379445 , -0.01883313,  0.03972586,  0.03764112,  0.05739698,
        0.01889243,  0.03129608, -0.00286167,  0.02338128,  0.02905614,
       -0.0138728 ,  0.07249687, -0.03786595,  0.05902694,  0.01725264])

### Broadcasting

В некоторых случаях нумпай умеет транслировать формы массивов, чтобы они были соотносимы и можно было делать операции.

In [165]:
x = np.random.rand(5)
x

array([0.20654241, 0.57738177, 0.7138124 , 0.4589949 , 0.91063852])

In [166]:
x * x

array([0.04265977, 0.33336971, 0.50952814, 0.21067632, 0.82926251])

In [167]:
x * 2  # почему все еще 5 элементов?

array([0.41308482, 1.15476354, 1.4276248 , 0.9179898 , 1.82127703])

In [168]:
(np.ones((5,)) * 2)

array([2., 2., 2., 2., 2.])

In [169]:
x * (np.ones((5,)) * 2)

array([0.41308482, 1.15476354, 1.4276248 , 0.9179898 , 1.82127703])

<img src=https://numpy.org/doc/stable/_images/broadcasting_1.png width="600">

In [172]:
a = np.array([0.0, 10.0, 20.0, 30.0])  # 4 элемента
b = np.array([1.0, 2.0, 3.0])  # 3 элемента

In [173]:
a + b  # dim mismatch

ValueError: operands could not be broadcast together with shapes (4,) (3,) 

In [174]:
a

array([ 0., 10., 20., 30.])

In [179]:
a.shape

(4,)

In [177]:
a.reshape((-1, 1))

array([[ 0.],
       [10.],
       [20.],
       [30.]])

In [180]:
a[:, np.newaxis]

array([[ 0.],
       [10.],
       [20.],
       [30.]])

In [181]:
a[:, np.newaxis] + b  # no mismatch due new dim

array([[ 1.,  2.,  3.],
       [11., 12., 13.],
       [21., 22., 23.],
       [31., 32., 33.]])

In [182]:
np.hstack([
    a.reshape((4, 1)), 
    a.reshape((4, 1)),
    a.reshape((4, 1))
])

array([[ 0.,  0.,  0.],
       [10., 10., 10.],
       [20., 20., 20.],
       [30., 30., 30.]])

In [183]:
np.vstack([
    b, 
    b,
    b,
    b
])

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

<img src=https://numpy.org/doc/stable/_images/broadcasting_4.png width="800">

#### Соединение массивов

In [184]:
x = np.array([1, 2, 3])
x

array([1, 2, 3])

In [185]:
np.vstack([x, 2 * x, 3 * x])

array([[1, 2, 3],
       [2, 4, 6],
       [3, 6, 9]])

In [186]:
np.hstack([x, 2 * x, 3 * x])

array([1, 2, 3, 2, 4, 6, 3, 6, 9])

In [187]:
x = x.reshape((3, 1))
x

array([[1],
       [2],
       [3]])

In [188]:
np.vstack([x, 2 * x, 3 * x])

array([[1],
       [2],
       [3],
       [2],
       [4],
       [6],
       [3],
       [6],
       [9]])

In [189]:
np.hstack([x, 2 * x, 3 * x])

array([[1, 2, 3],
       [2, 4, 6],
       [3, 6, 9]])

In [190]:
np.stack([x, 2 * x], axis=2)

array([[[1, 2]],

       [[2, 4]],

       [[3, 6]]])

In [191]:
x = x.reshape((-1,))
x

array([1, 2, 3])

In [192]:
np.tile(x, 1)

array([1, 2, 3])

In [193]:
np.tile(x, 2)

array([1, 2, 3, 1, 2, 3])

In [194]:
np.tile(x, (1, 2))

array([[1, 2, 3, 1, 2, 3]])

In [195]:
np.tile(x, (2, 2))

array([[1, 2, 3, 1, 2, 3],
       [1, 2, 3, 1, 2, 3]])

In [196]:
np.tile(x, (2, 2, 2))

array([[[1, 2, 3, 1, 2, 3],
        [1, 2, 3, 1, 2, 3]],

       [[1, 2, 3, 1, 2, 3],
        [1, 2, 3, 1, 2, 3]]])

### Логические маски

In [197]:
x = np.arange(1, 21).reshape(5, 4)
x

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20]])

In [198]:
x > 5

array([[False, False, False, False],
       [False,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [199]:
x[x > 5]

array([ 6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20])

In [200]:
x[x < np.median(x)]

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [201]:
x % 2 == 0

array([[False,  True, False,  True],
       [False,  True, False,  True],
       [False,  True, False,  True],
       [False,  True, False,  True],
       [False,  True, False,  True]])

In [202]:
x[x % 2 == 0]

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [203]:
np.where(x % 2 == 0, True, False)

array([[False,  True, False,  True],
       [False,  True, False,  True],
       [False,  True, False,  True],
       [False,  True, False,  True],
       [False,  True, False,  True]])

In [204]:
np.where(x % 2 == 0, 555, -1)

array([[ -1, 555,  -1, 555],
       [ -1, 555,  -1, 555],
       [ -1, 555,  -1, 555],
       [ -1, 555,  -1, 555],
       [ -1, 555,  -1, 555]])

In [206]:
x

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20]])

In [205]:
y = np.random.rand(5, 4)
y

array([[0.24403484, 0.77397081, 0.50075458, 0.86103672],
       [0.54812394, 0.38541727, 0.41945243, 0.07691337],
       [0.04436857, 0.36678231, 0.40542865, 0.54237752],
       [0.43525258, 0.88463205, 0.79674149, 0.33126847],
       [0.67069152, 0.24808904, 0.11582683, 0.88404367]])

In [207]:
np.where(x % 2 == 0, x, y)

array([[ 0.24403484,  2.        ,  0.50075458,  4.        ],
       [ 0.54812394,  6.        ,  0.41945243,  8.        ],
       [ 0.04436857, 10.        ,  0.40542865, 12.        ],
       [ 0.43525258, 14.        ,  0.79674149, 16.        ],
       [ 0.67069152, 18.        ,  0.11582683, 20.        ]])

#### Задача 3

В матрице X вместо элементов, делящихся на 7, оставить их квадрат, вместо остальных - корень

In [208]:
x = np.random.randint(1, 22, (10, 10))
x

array([[ 9, 16, 18, 17, 18,  2,  2, 15, 18,  3],
       [12, 10, 12, 17,  6,  2, 21, 13,  2, 21],
       [13, 12,  3,  9, 18,  5,  1, 15, 18,  9],
       [12, 13, 14, 15, 18,  6,  5,  8,  6,  6],
       [19,  9,  2, 15,  7, 17,  9, 19,  6,  4],
       [13, 19, 17, 15, 10,  7, 12,  1,  8, 13],
       [11, 12, 20, 12,  8, 16, 12, 11,  6,  5],
       [12, 19,  7, 19,  2,  8, 18, 18, 10, 13],
       [16, 10, 17,  5, 14, 17,  5,  9, 10,  6],
       [20, 14, 14,  1, 18, 16, 12, 21, 13, 16]])

In [209]:
np.where(x % 7 == 0, x ** 2, np.sqrt(x))

array([[  3.        ,   4.        ,   4.24264069,   4.12310563,
          4.24264069,   1.41421356,   1.41421356,   3.87298335,
          4.24264069,   1.73205081],
       [  3.46410162,   3.16227766,   3.46410162,   4.12310563,
          2.44948974,   1.41421356, 441.        ,   3.60555128,
          1.41421356, 441.        ],
       [  3.60555128,   3.46410162,   1.73205081,   3.        ,
          4.24264069,   2.23606798,   1.        ,   3.87298335,
          4.24264069,   3.        ],
       [  3.46410162,   3.60555128, 196.        ,   3.87298335,
          4.24264069,   2.44948974,   2.23606798,   2.82842712,
          2.44948974,   2.44948974],
       [  4.35889894,   3.        ,   1.41421356,   3.87298335,
         49.        ,   4.12310563,   3.        ,   4.35889894,
          2.44948974,   2.        ],
       [  3.60555128,   4.35889894,   4.12310563,   3.87298335,
          3.16227766,  49.        ,   3.46410162,   1.        ,
          2.82842712,   3.60555128],
       [  