В машинном обучении повсеместно встречаются операции с векторами и матрицами. Для оптимизации работы с ними (путём обращения к коду на С и соотвествующему формату хранения данных) была написана библиотека numpy.

In [None]:
import numpy as np

In [None]:
#Давайте для начала вспомним как мы создавали массивы (списки) стандартными методами:
x = [1, 2, 3, 4, 5]
print(x)

[1, 2, 3, 4, 5]


In [None]:
#Преобразуем его в numpy.array:
x_np = np.array(x)
print(type(x_np))

<class 'numpy.ndarray'>


In [None]:
x_np

array([1, 2, 3, 4, 5])

In [None]:
x_np / 10

array([0.1, 0.2, 0.3, 0.4, 0.5])

In [None]:
x_np + 10

array([11, 12, 13, 14, 15])

In [None]:
x_np.mean()

3.0

In [None]:
x_np.max()

5

In [None]:
print(dir(x_np))

['T', '__abs__', '__add__', '__and__', '__array__', '__array_finalize__', '__array_function__', '__array_interface__', '__array_prepare__', '__array_priority__', '__array_struct__', '__array_ufunc__', '__array_wrap__', '__bool__', '__class__', '__complex__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dir__', '__divmod__', '__doc__', '__eq__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__', '__imul__', '__index__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__lshift__', '__lt__', '__matmul__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__', '__or__', '__pos__', '__pow__', '__radd__', '__rand__', '__rdivmod__', '__reduce__', '__reduce_ex__', '__repr__', '__rfloordiv__', '__rlshift_

In [None]:
x ** 30

TypeError: ignored

In [None]:
x_np ** 30

array([                  1,          1073741824,     205891132094649,
       1152921504606846976, 8985370930000934825])

В numpy есть много функций, которые аналогичны функциям list

In [None]:
np.arange(1, 10)

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
%time np.arange(1, 5000000)
%time list(range(1, 5000000))

print(1)

CPU times: user 9.94 ms, sys: 7.3 ms, total: 17.2 ms
Wall time: 24.6 ms
CPU times: user 81.7 ms, sys: 59.4 ms, total: 141 ms
Wall time: 141 ms
1


In [None]:
# По numpy есть хорошая справка:
np.lookfor('create array')

Search results for 'create array'
---------------------------------
numpy.array
    Create an array.
numpy.memmap
    Create a memory-map to an array stored in a *binary* file on disk.
numpy.diagflat
    Create a two-dimensional array with the flattened input as a diagonal.
numpy.fromiter
    Create a new 1-dimensional array from an iterable object.
numpy.partition
    Return a partitioned copy of an array.
numpy.ctypeslib.as_array
    Create a numpy array from a ctypes array or POINTER.
numpy.ma.diagflat
    Create a two-dimensional array with the flattened input as a diagonal.
numpy.ma.make_mask
    Create a boolean mask from an array.
numpy.lib.Arrayterator
    Buffered iterator for big arrays.
numpy.ctypeslib.as_ctypes
    Create and return a ctypes object from a numpy array.  Actually
numpy.ma.mrecords.fromarrays
    Creates a mrecarray from a (flat) list of masked arrays.
numpy.ma.mvoid.__new__
    Create a new masked array from scratch.
numpy.ma.MaskedArray.__new__
    Create a 

**Как создавать массивы**

In [None]:
np.array([1,2,3,4], dtype=np.int16)

array([1, 2, 3, 4], dtype=int16)

In [None]:
np.arange(1,10,2)

array([1, 3, 5, 7, 9])

In [None]:
np.linspace(0, 1, 25)

array([0.        , 0.04166667, 0.08333333, 0.125     , 0.16666667,
       0.20833333, 0.25      , 0.29166667, 0.33333333, 0.375     ,
       0.41666667, 0.45833333, 0.5       , 0.54166667, 0.58333333,
       0.625     , 0.66666667, 0.70833333, 0.75      , 0.79166667,
       0.83333333, 0.875     , 0.91666667, 0.95833333, 1.        ])

In [None]:
np.ones((3, 3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [None]:
np.zeros((3, 2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [None]:
np.diag(np.array([1, 2, 3, 4]))

array([[1, 0, 0, 0],
       [0, 2, 0, 0],
       [0, 0, 3, 0],
       [0, 0, 0, 4]])

In [None]:
x = np.diag(np.array([1, 2, 3, 4]))
x.shape

(4, 4)

In [None]:
np.random.rand(3,2)

array([[0.26064384, 0.72330993],
       [0.71341975, 0.02312941],
       [0.2222487 , 0.57991142]])

In [None]:
np.random.randn(3,2)

array([[ 1.25274105, -0.11941923],
       [ 1.26366691, -1.19561446],
       [-0.28044712, -0.01030114]])

**Операции над одномерными массивами**

In [None]:
a = np.array([1, 2, 4, 8, 16])
b = np.array([1, 2, 3, 4, 5])

In [None]:
a + b

array([ 2,  4,  7, 12, 21])

In [None]:
a - b

array([ 0,  0,  1,  4, 11])

In [None]:
a * b

array([ 1,  4, 12, 32, 80])

In [None]:
a / b

array([1.        , 1.        , 1.33333333, 2.        , 3.2       ])

In [None]:
(a / b).round()

array([1., 1., 1., 2., 3.])

In [None]:
b = np.arange(9, -1,-1)

print(np.sort(b))
print(b)

[0 1 2 3 4 5 6 7 8 9]
[9 8 7 6 5 4 3 2 1 0]


In [None]:
a = np.arange(10, -1, -1)
a = np.delete(a, [5, 7])
print(a)

[10  9  8  7  6  4  2  1  0]


In [None]:
a = np.array([1, 3, 5, 7])
a = np.append(a, 7)
a

array([1, 3, 5, 7, 7])

In [None]:
#Можно делать маски:
print(a[[False, False,  True,  True, False]])

[5 7]


In [None]:
a = np.arange(1, 15, 0.5)
mask = (a < 10) * (a > 3)
a[mask]

array([3.5, 4. , 4.5, 5. , 5.5, 6. , 6.5, 7. , 7.5, 8. , 8.5, 9. , 9.5])

In [None]:
#очень удобно что в массивах numpy можно обращаться к ячейкам по индексу
np.random.rand(3,)[1]

0.3131653635466932

**Операции над матрицами**

In [None]:
X = np.random.rand(4,4)
Y = np.random.rand(4,4)

In [None]:
X

array([[0.22651048, 0.58058677, 0.47970971, 0.40150481],
       [0.08755502, 0.62008266, 0.94753807, 0.45315535],
       [0.68865708, 0.65056236, 0.85823647, 0.73683761],
       [0.15265707, 0.63556416, 0.53171029, 0.24426404]])

In [None]:
#обращение к конкретному элементу:
X[1,2]

0.9475380659615873

In [None]:
np.diag(X)

array([0.22651048, 0.62008266, 0.85823647, 0.24426404])

In [None]:
X.ndim

2

In [None]:
X.shape

(4, 4)

In [None]:
X.T

array([[0.22651048, 0.08755502, 0.68865708, 0.15265707],
       [0.58058677, 0.62008266, 0.65056236, 0.63556416],
       [0.47970971, 0.94753807, 0.85823647, 0.53171029],
       [0.40150481, 0.45315535, 0.73683761, 0.24426404]])

In [None]:
X

array([[0.77914093, 0.76398995, 0.27056739, 0.59682662],
       [0.47706274, 0.04442296, 0.04882901, 0.84319607],
       [0.6564771 , 0.05182095, 0.33319085, 0.40950842],
       [0.71890061, 0.46881715, 0.1395023 , 0.62517735]])