In [2]:
# All elements in np.array must be of the same type / homogeneous

In [27]:
# An array can be defined from either a list or a tuple
import numpy as np
a = np.array([1,2,3,4])
b = np.array((1,2,3,4))
a, b

(array([1, 2, 3, 4]), array([1, 2, 3, 4]))

In [28]:
# You can manually specify the data type of an array
a = np.array([1,2,3,4], dtype='f')
b = np.array([1,2,3,4], dtype='i')
a, b

(array([1., 2., 3., 4.], dtype=float32), array([1, 2, 3, 4], dtype=int32))

In [29]:
# The dimension of an array corresponds to the number of indexes necessary to access the deepest element
a = np.array([[[1,2,3],[4,5,6]],[[1,2,3],[4,5,6]]])
a.ndim

3

In [6]:
# All "sub-arrays" must have the same number of elements
# Think of matrices, where each line inside a matrix must have the same length
# a = np.array([[1,2,3],[4,5,6,7]]) is not valid

In [30]:
a = np.array([[[1,2,3],[4,5,6]],[[1,2,3],[4,5,6]]])

# The shape attribute returns the number of elements at each dimension
print(a.shape)
# or the number of elements at a given dimension
print(a.shape[2])

# The size attribute returns the total number of elements
print(a.size)
# or the number of elements inside a given dimension
print(a[1].size)

# The nbytes attributes returns the total number of bytes taken by a structure
print(a.nbytes)
# or taken by a given dimension
print(a[1].nbytes)

# Type "a." + tab to see the list of attributes you can use on an array

(2, 2, 3)
3
12
6
48
24


In [8]:
# Numpy provides a number of different ways to create / manage arrays

In [31]:
# np.arange(n) creates a 1-dim array of length n with values from 0 to n-1
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [32]:
# You can specify the starting value
np.arange(5,10)

array([5, 6, 7, 8, 9])

In [33]:
# as well as the steps
np.arange(5,10,2)

array([5, 7, 9])

In [34]:
# np.random.permutation returns a randomly shuffled version of an array
np.random.permutation(np.arange(10))

array([6, 4, 2, 5, 3, 7, 1, 9, 8, 0])

In [35]:
# np.random.randint returns a random integer in a range
np.random.randint(10,20)

17

In [36]:
# np.random.rand returns random numbers between 0 and 1, uniformly distributed
np.random.rand(5)

array([0.93191444, 0.61905257, 0.65848226, 0.40193675, 0.79342937])

In [37]:
# You can specify the dimensions of the random matrix
np.random.rand(2,3)

array([[0.893017  , 0.49252975, 0.76385469],
       [0.03172893, 0.77711092, 0.22700993]])

In [38]:
# np.random.randn returns random normal numbers, gaussian distribution
np.random.randn(5)

array([-0.79642153, -1.02409158, -0.41428763,  0.01046944,  0.24627822])

In [11]:
# .reshape takes all the elements of an array and reshape it to the specified dimensions
a = np.array([[[1,2,3],[4,5,6]],[[1,2,3],[4,5,6]]])
print(a)
print(a.shape)

b = a.reshape(6,2)
print(b)

# The reshaped array must have the same size

[[[1 2 3]
  [4 5 6]]

 [[1 2 3]
  [4 5 6]]]
(2, 2, 3)
[[1 2]
 [3 4]
 [5 6]
 [1 2]
 [3 4]
 [5 6]]


In [39]:
# np.zeros returns an array/matrix filled with zeros
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [40]:
# np.ones returns an array/matrix filled with ones
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [13]:
# An important difference between slicing lists and slicing np arrays:
# slicing a list creates a copy while when slicing a np array , you're accessing the same memory view
a = [1,2,3,4,5]
b = a[1:4]
b[0] = 6
print(b)
print(a) # a is not modified

a = np.array([1,2,3,4,5])
b = a[1:4]
b[0] = 6
print(b)
print(a) # a is modified

# To prevent that you can make a copy of the np array:
a = np.array([1,2,3,4,5])
b = a[1:4].copy()
b[0] = 6
print(b)
print(a) # a is not modified

[6, 3, 4]
[1, 2, 3, 4, 5]
[6 3 4]
[1 6 3 4 5]
[6 3 4]
[1 2 3 4 5]


In [41]:
# You can find the indices of a value in an array with np.argwhere
a = np.arange(50)
idx = np.argwhere(a==25)
idx

array([[25]], dtype=int64)

In [15]:
# .T to transpose a matrix
a = np.random.randint(16,size=(4,4))
print(a)
print(a.T)

[[15  5  8 14]
 [14  4 12  2]
 [ 0  5  4  9]
 [12 13  1 10]]
[[15 14  0 12]
 [ 5  4  5 13]
 [ 8 12  4  1]
 [14  2  9 10]]


In [16]:
# numpy.linalg provides a lot of linear algebra functions
import numpy.linalg as la
print(la.inv(a))
print(a.dot(la.inv(a)))

[[ 0.05233434 -0.00207078 -0.10353916  0.02033133]
 [-0.10253514  0.04122741  0.06137048  0.08007028]
 [-0.03928213  0.08189006  0.09450301 -0.04643574]
 [ 0.07442269 -0.0592997   0.03501506 -0.02384538]]
[[ 1.00000000e+00  0.00000000e+00 -1.11022302e-16  5.55111512e-17]
 [ 2.77555756e-17  1.00000000e+00  1.11022302e-16  1.52655666e-16]
 [ 0.00000000e+00  0.00000000e+00  1.00000000e+00 -2.77555756e-17]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000000e+00]]


In [17]:
# You can act specifically on rows or columns
a.sort(axis=0)
print(a)
a.sort(axis=1)
print(a)

[[ 0  4  1  2]
 [12  5  4  9]
 [14  5  8 10]
 [15 13 12 14]]
[[ 0  1  2  4]
 [ 4  5  9 12]
 [ 5  8 10 14]
 [12 13 14 15]]


In [44]:
# Other powerful indexing actions

In [45]:
# By using an index array, you can specifically tell which index you want
a = np.array([1,2,3,4,5])
a[[2,4]]

array([3, 5])

In [43]:
# You can use booleans with an index array to specify which index you're interested in
a[[True, False, True, False, True]]

array([1, 3, 5])

In [None]:
# When using an index array (or masking), you're making a copy and not just viewing the array

In [46]:
# You can create a boolean array with a condition
a<4

array([ True,  True,  True, False, False])

In [47]:
# You can use &, |, ~ (not and, or, not) in arrays to combine conditions
(a>2) & (a<5)

array([False, False,  True,  True, False])

In [25]:
# Broadcasting facilitates operations with matrices
a = np.random.randint(4, size=(2,2))
print(a)
print(a+5)

[[2 0]
 [3 3]]
[[7 5]
 [8 8]]


In [26]:
# Same if you want to add columns or rows
b = [[1],[2]]
a+b

array([[3, 1],
       [5, 5]])

In [21]:
# Concatenete arrays horizontally or vertically with np.hstack and np.vstack
a = np.random.randint(4,size=(2,2))
b = np.random.randint(4,size=(2,2))
print(a)
print(b)
print(np.vstack((a,b)))
print(np.hstack((a,b)))

[[1 2]
 [0 2]]
[[1 1]
 [3 1]]
[[1 2]
 [0 2]
 [1 1]
 [3 1]]
[[1 2 1 1]
 [0 2 3 1]]


In [22]:
# Numpy implementation (universal functions, ufuncs) is usually faster than python built-in functions
# ufuncs use vectorization instead of iteration
a = np.random.rand(1000000)
%timeit sum(a)
%timeit np.sum(a)

91.3 ms ± 884 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
275 µs ± 674 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
