In [7]:
# All elements in np.array must be of the same type / homogeneous

In [8]:
# An array can be defined from either a list or a tuple
import numpy as np
a = np.array([1,2,3,4])
b = np.array((1,2,3,4))
print(a, b)

[1 2 3 4] [1 2 3 4]


In [9]:
# You can manually specify the data type of an array
a = np.array([1,2,3,4], dtype='f')
b = np.array([1,2,3,4], dtype='i')
print(a, b)

[1. 2. 3. 4.] [1 2 3 4]


In [10]:
# The dimension of an array corresponds to the number of indexes necessary to access the deepest element
a = np.array([[[1,2,3],[4,5,6]],[[1,2,3],[4,5,6]]])
print(a.ndim)

3


In [11]:
# All "sub-arrays" must have the same number of elements
# Think of matrices, where each line inside a matrix must have the same length
# a = np.array([[1,2,3],[4,5,6,7]])
# a[1,2] will return an error

In [12]:
a = np.array([[[1,2,3],[4,5,6]],[[1,2,3],[4,5,6]]])

# The shape attribute returns the number of elements at each dimension
print(a.shape)
# or the number of elements at a given dimension
print(a.shape[2])

# The size attribute returns the total number of elements
print(a.size)
# or the number of elements inside a given dimension
print(a[1].size)

# The nbytes attributes returns the total number of bytes taken by a structure
print(a.nbytes)
# or taken by a given dimension
print(a[1].nbytes)

# Type "a." + tab to see the list of attributes you can use on an array

(2, 2, 3)
3
12
6
48
24


In [13]:
# Numpy provides a number of different ways to create / manage arrays

In [14]:
# np.arange(n) creates a 1-dim array of length n with values from 0 to n-1
print(np.arange(10))
# You can specify the starting value
print(np.arange(5,10))
# as well as the steps
print(np.arange(5,10,2))

[0 1 2 3 4 5 6 7 8 9]
[5 6 7 8 9]
[5 7 9]


In [15]:
# np.random.permutation returns a randomly shuffled version of an array
print(np.random.permutation(np.arange(10)))

# np.random.randint returns a random integer in a range
print(np.random.randint(10,20))

# np.random.rand returns random numbers between 0 and 1, uniformly distributed
print(np.random.rand(5))
# You can specify the dimensions of the random matrix
print(np.random.rand(2,3))

# np.random.randn returns random normal numbers, gaussian distribution
print(np.random.randn(5))

[1 8 3 9 2 6 0 5 7 4]
13
[0.21703317 0.792183   0.93475845 0.10753315 0.15872698]
[[0.84319894 0.87930953 0.23856876]
 [0.87984534 0.04764035 0.10058482]]
[ 0.74204956 -1.58358563 -0.43405096 -0.08377805  0.10050605]


In [16]:
# .reshape takes all the elements of an array and reshape it to the specified dimensions
a = np.array([[[1,2,3],[4,5,6]],[[1,2,3],[4,5,6]]])
print(a)
print(a.shape)

b = a.reshape(6,2)
print(b)

# The reshaped array must have the same size

[[[1 2 3]
  [4 5 6]]

 [[1 2 3]
  [4 5 6]]]
(2, 2, 3)
[[1 2]
 [3 4]
 [5 6]
 [1 2]
 [3 4]
 [5 6]]


In [17]:
# np.zeros returns an array/matrix filled with zeros
print(np.zeros((2,3)))

# np.ones returns an array/matrix filled with ones
print(np.ones((2,3)))

[[0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]]


In [18]:
# An important difference between slicing lists and slicing np arrays:
# slicing a list creates a copy while when slicing a np array , you're accessing the same memory view
a = [1,2,3,4,5]
b = a[1:4]
b[0] = 6
print(b)
print(a) # a is not modified

a = np.array([1,2,3,4,5])
b = a[1:4]
b[0] = 6
print(b)
print(a) # a is modified

# To prevent that you can make a copy of the np array:
a = np.array([1,2,3,4,5])
b = a[1:4].copy()
b[0] = 6
print(b)
print(a) # a is not modified

[6, 3, 4]
[1, 2, 3, 4, 5]
[6 3 4]
[1 6 3 4 5]
[6 3 4]
[1 2 3 4 5]


In [19]:
# You can find the indices of a value in an array with np.argwhere
a = np.arange(50)
idx = np.argwhere(a==25)
print(idx)

[[25]]


In [37]:
# .T to transpose a matrix
a = np.random.randint(16,size=(4,4))
print(a)
print(a.T)

[[ 1 11  1 10]
 [ 1 15 15  8]
 [ 5  5  8 13]
 [ 3  1  4  1]]
[[ 1  1  5  3]
 [11 15  5  1]
 [ 1 15  8  4]
 [10  8 13  1]]


In [43]:
# numpy.linalg provides a lot of linear algebra functions
import numpy.linalg as la
print(la.inv(a))
print(a.dot(la.inv(a)))

[[ 0.11055777 -0.08864542 -0.06374502  0.43227092]
 [ 0.10806773  0.0124502  -0.10059761  0.12749004]
 [-0.10507968  0.06623506  0.04482072 -0.06175299]
 [-0.01942231 -0.01145418  0.1125498  -0.17729084]]
[[ 1.00000000e+00 -2.77555756e-17 -2.22044605e-16  4.44089210e-16]
 [ 1.66533454e-16  1.00000000e+00  0.00000000e+00  2.22044605e-16]
 [ 1.11022302e-16 -2.77555756e-17  1.00000000e+00  0.00000000e+00]
 [ 4.85722573e-17  3.64291930e-17 -4.16333634e-17  1.00000000e+00]]


In [48]:
# You can act specifically on rows or columns
a.sort(axis=0)
print(a)
a.sort(axis=1)
print(a)

[[ 1  1  1  1]
 [ 1  4  5  8]
 [ 3  8 10 11]
 [ 5 13 15 15]]
[[ 1  1  1  1]
 [ 1  4  5  8]
 [ 3  8 10 11]
 [ 5 13 15 15]]


In [66]:
# Other powerful indexing actions

# By using an index array, you can specifically tell which index you want
a = np.array([1,2,3,4,5])
print(a[[2,4]])

# You can use booleans with an index array to specify which index you're interested in
print(a[[True, False, True, False, True]])

# When using an index array (or masking), you're making a copy and not just viewing the array

# You can create a boolean array with a condition
print(a<4)

# You can use &, |, ~ (not and, or, not) in arrays to combine conditions
print((a>2) & (a<5))

[3 5]
[1 3 5]
[ True  True  True False False]
[False False  True  True False]


In [72]:
# Broadcasting facilitates operations with matrices
a = np.random.randint(4, size=(2,2))
print(a)
print(a+5)

# Same if you want to add columns or rows
b = [[1],[2]]
print(a+b)

[[1 3]
 [3 3]]
[[6 8]
 [8 8]]
[[2 4]
 [5 5]]


In [77]:
# Concatenete arrays horizontally or vertically with np.hstack and np.vstack
a = np.random.randint(4,size=(2,2))
b = np.random.randint(4,size=(2,2))
print(a)
print(b)
print(np.vstack((a,b)))
print(np.hstack((a,b)))

[[1 0]
 [3 1]]
[[2 1]
 [0 2]]
[[1 0]
 [3 1]
 [2 1]
 [0 2]]
[[1 0 2 1]
 [3 1 0 2]]


In [83]:
# Numpy implementation (universal functions, ufuncs) is usually faster than python built-in functions
# ufuncs use vectorization instead of iteration
a = np.random.rand(1000000)
%timeit sum(a)
%timeit np.sum(a)

94.7 ms ± 1.45 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
292 µs ± 5.46 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
