# NumPy Basics

In [1]:
import numpy as np

## 1. ndarray Creation

In [2]:
# Create one-dimensional array with array()
arr_1 = np.array([1, 2, 3])

print('arr_1:\n', arr_1)
print('type:', type(arr_1))
print('data type:', arr_1.dtype)
print('number of axes/dimensions:', arr_1.ndim)
print('shape:', arr_1.shape)
print('size:', arr_1.size)
print('item size in bytes:', arr_1.itemsize)

arr_1:
 [1 2 3]
type: <class 'numpy.ndarray'>
data type: int64
number of axes/dimensions: 1
shape: (3,)
size: 3
item size in bytes: 8


In [3]:
# Create two-dimensional array with array()
arr_2 = np.array([[1, 2, 3], [4, 5, 6]], dtype='float32')

print('arr_2:\n', arr_2)
print('type:', type(arr_2))
print('data type:', arr_2.dtype)
print('number of axes/dimensions:', arr_2.ndim)
print('shape:', arr_2.shape)
print('size:', arr_2.size)
print('item size in bytes:', arr_2.itemsize)

arr_2:
 [[1. 2. 3.]
 [4. 5. 6.]]
type: <class 'numpy.ndarray'>
data type: float32
number of axes/dimensions: 2
shape: (2, 3)
size: 6
item size in bytes: 4


In [4]:
# Create arrays with other useful methods #1
arr_3 = np.arange(0, 50, 5)
arr_4 = np.linspace(0, np.pi, 6)
arr_5 = np.ones_like(arr_3)
arr_6 = np.empty((2, 3))

print('arr_3:\n', arr_3)
print('arr_4:\n', arr_4)
print('arr_5:\n', arr_5)
print('arr_6:\n', arr_6)

arr_3:
 [ 0  5 10 15 20 25 30 35 40 45]
arr_4:
 [0.         0.62831853 1.25663706 1.88495559 2.51327412 3.14159265]
arr_5:
 [1 1 1 1 1 1 1 1 1 1]
arr_6:
 [[-1.72723371e-077 -2.32036297e+077  1.97626258e-323]
 [ 0.00000000e+000  0.00000000e+000  0.00000000e+000]]


In [5]:
# Create arrays with other useful methods #2
arr_7 = np.zeros((2, 3, 2))
arr_8 = np.fromfunction(lambda i, j: i + j, (3, 3), dtype=int)
arr_9 = np.random.random((2, 3))

print('arr_7:\n', arr_7)
print('arr_8:\n', arr_8)
print('arr_9:\n', arr_9)

arr_7:
 [[[0. 0.]
  [0. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 0.]
  [0. 0.]]]
arr_8:
 [[0 1 2]
 [1 2 3]
 [2 3 4]]
arr_9:
 [[0.37886736 0.4136141  0.27396963]
 [0.72742684 0.71592678 0.36676315]]


## 2. ndarray Manipulation

In [6]:
# ndarray manipulation #1
A = np.array([10, 20, 30, 40, 50])
B = np.arange(5)

print('A:\n', A)
print('B:\n', B)

print('A + B:\n', A + B)
print('A * B:\n', A * B)

print('A > 30:\n', A > 30)
print('(A > 30) * (B % 2 == 0):\n', (A > 30) * (B % 2 == 0))

A:
 [10 20 30 40 50]
B:
 [0 1 2 3 4]
A + B:
 [10 21 32 43 54]
A * B:
 [  0  20  60 120 200]
A > 30:
 [False False False  True  True]
(A > 30) * (B % 2 == 0):
 [False False False False  True]


In [7]:
# ndarray manipulation #2
C = np.array([[2, 5], [9, 5], [1, 0]])
D = np.array([[1, 4], [5, 6]])
E = np.array([[1, 4], [5, 6], [0, 5]])

print('C:\n', C)
print('D:\n', D)
print('E:\n', E)

print('C @ D:\n', C @ D)
print('C.dot(D):\n', C.dot(D))
# print('C.dot(E):\n', C.dot(E))     # Error (ndim must match!)

C:
 [[2 5]
 [9 5]
 [1 0]]
D:
 [[1 4]
 [5 6]]
E:
 [[1 4]
 [5 6]
 [0 5]]
C @ D:
 [[27 38]
 [34 66]
 [ 1  4]]
C.dot(D):
 [[27 38]
 [34 66]
 [ 1  4]]


In [8]:
# ndarray manipulation #3
F = np.arange(10)

print('F:\n', F)
print('F - 2:\n', F - 2.0)
print('F * 4:\n', F * 4)
print('F % 2:\n', F % 2)

F:
 [0 1 2 3 4 5 6 7 8 9]
F - 2:
 [-2. -1.  0.  1.  2.  3.  4.  5.  6.  7.]
F * 4:
 [ 0  4  8 12 16 20 24 28 32 36]
F % 2:
 [0 1 0 1 0 1 0 1 0 1]


In [9]:
# ndarray manipulation #4
G = np.random.random((2, 4)) * 100

print('G:\n', G)
print('sum:\n', G.sum())
print('cumulative sum:\n', G.cumsum())
print('max:\n', G.max())
print('min:\n', G.min())
print('mean:\n', G.mean())
print('where(>60):\n', G[np.where(G > 60)])

G:
 [[83.95068966 70.12711865 68.65262379  5.67943019]
 [ 8.04007375 10.96911765 99.8862175  40.47188776]]
sum:
 387.7771589508984
cumulative sum:
 [ 83.95068966 154.07780831 222.7304321  228.40986229 236.44993604
 247.41905369 347.30527119 387.77715895]
max:
 99.88621749742056
min:
 5.679430188912549
mean:
 48.4721448688623
where(>60):
 [83.95068966 70.12711865 68.65262379 99.8862175 ]


In [10]:
# ndarray manipulation #5
H = np.random.randint(30, size=(2, 4))

print('H:\n', H)
# index of max when the array is flatten
print('argmax(H):\n', np.argmax(H))
# indices of max (column-wise) 
print('argmax(H, axis=0):\n', np.argmax(H, axis=0))
# indices of max (row-wise)
print('argmax(H, axis=1):\n', np.argmax(H, axis=1))

# same as argsort(H, axis=1)
print('argsort(H):\n', np.argsort(H))
# sort elements (column-wise)
print('argsort(H, axis=0):\n', np.argsort(H, axis=0))
# sort elements (row-wise)
print('argsort(H, axis=1):\n', np.argsort(H, axis=1))

H:
 [[28 26 28  6]
 [24  8 20  9]]
argmax(H):
 0
argmax(H, axis=0):
 [0 0 0 1]
argmax(H, axis=1):
 [0 0]
argsort(H):
 [[3 1 0 2]
 [1 3 2 0]]
argsort(H, axis=0):
 [[1 1 1 0]
 [0 0 0 1]]
argsort(H, axis=1):
 [[3 1 0 2]
 [1 3 2 0]]


## 3. Indexing & Slicing

In [11]:
# indexing & slicing #1
I = np.random.randint(100, size=7)

print('I:\n', I)
print('I[1:4]:\n', I[1:4])
print('I[2:]:\n', I[2:])
print('I[1::2]:\n', I[1::2], end='\n\n')

J = np.random.randint(100, size=(3, 5))

print('J:\n', J)
print('J[1, 3]:\n', J[1, 3])
print('J[0:2, 1:3]:\n', J[0:2, 1:3])

I:
 [32 63 30 85 27 65  9]
I[1:4]:
 [63 30 85]
I[2:]:
 [30 85 27 65  9]
I[1::2]:
 [63 85 65]

J:
 [[39 80 93 26 99]
 [28  8 41 90 54]
 [35 70 43  2 35]]
J[1, 3]:
 90
J[0:2, 1:3]:
 [[80 93]
 [ 8 41]]


In [12]:
# indexing & slicing #2
K = np.random.randn(2, 4) + 10

print('K:\n', K)
print('K[0, 0:2]:\n', K[0, 0:2], end='\n\n')

L = np.random.randn(2, 3, 3) + 10

print('L:\n', L)
print('shape:', L.shape)
print('L[..., 2]:\n', L[..., 2])

K:
 [[ 8.53043121 10.1205655  10.91944574  8.71303918]
 [ 9.24522636  8.64136669  9.93825842 10.94458528]]
K[0, 0:2]:
 [ 8.53043121 10.1205655 ]

L:
 [[[10.05303871 11.97613507  9.14937606]
  [ 9.23344768  8.74808422 11.1603547 ]
  [ 8.97419632 11.58245418 10.30769371]]

 [[10.79173755  8.90258381  8.63535346]
  [11.02775443  9.00429152  9.98375539]
  [ 9.8576386   9.49033129  8.49421977]]]
shape: (2, 3, 3)
L[..., 2]:
 [[ 9.14937606 11.1603547  10.30769371]
 [ 8.63535346  9.98375539  8.49421977]]


## 4. Shape Manipulation

In [13]:
# reshape() vs. resize()
# reshape() returns ndarray, while resize returns None (modifies ndarray directly)
M = np.array([1, 2, 3, 4, 5, 6])

print('M.reshape(2, 3):\n', M.reshape(2, 3))
print('M(after reshape(2, 3)):\n', M)
M.resize(2, 3)
print('M(after resize(2, 3)):\n', M)
print('Transpose of M:\n', M.T)

M.reshape(2, 3):
 [[1 2 3]
 [4 5 6]]
M(after reshape(2, 3)):
 [1 2 3 4 5 6]
M(after resize(2, 3)):
 [[1 2 3]
 [4 5 6]]
Transpose of M:
 [[1 4]
 [2 5]
 [3 6]]


In [14]:
# ravel() vs. flatten()
# ravel() returns reference/view of the original ndarray,
# while flatten returns copy of the original ndarray
N = np.arange(12).reshape((3, 4))

print('N:\n', N)

N_ravel = np.ravel(N)
print('N(after ravel():)\n', N_ravel)
N_flatten = N.flatten()
print('N(after flatten:)\n', N_flatten)

N_ravel[3] = 100
print('N:\n', N)
N_flatten[2] = 300
print('N:\n', N)

N:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
N(after ravel():)
 [ 0  1  2  3  4  5  6  7  8  9 10 11]
N(after flatten:)
 [ 0  1  2  3  4  5  6  7  8  9 10 11]
N:
 [[  0   1   2 100]
 [  4   5   6   7]
 [  8   9  10  11]]
N:
 [[  0   1   2 100]
 [  4   5   6   7]
 [  8   9  10  11]]


## 5. View & Copy

In [15]:
# view() vs. copy() 
O = np.array([2, 5, 3])
print('O:', O, end='\n\n')

O_1 = O
print('O_1:', O_1)
print('id(O) == id(O_1):', id(O) == id(O_1))
print('O_1.base:', O_1.base, end='\n\n')

O_2 = O.view()
print('O_2:', O_2)
print('id(O) == id(O_2):', id(O) == id(O_2))
print('O_2.base:', O_2.base, end='\n\n')

O_3 = O.copy()
print('O_3:', O_3)
print('id(O) == id(O_3):', id(O) == id(O_3))
print('O_3.base:', O_3.base)

O: [2 5 3]

O_1: [2 5 3]
id(O) == id(O_1): True
O_1.base: None

O_2: [2 5 3]
id(O) == id(O_2): False
O_2.base: [2 5 3]

O_3: [2 5 3]
id(O) == id(O_3): False
O_3.base: None


In [16]:
# view() vs. copy() continued
O_2[1] = 10
print('O:', O)
print('O_2:', O_2, end='\n\n')

O_3[1] = 100
print('O:', O)
print('O_3:', O_3)

O: [ 2 10  3]
O_2: [ 2 10  3]

O: [ 2 10  3]
O_3: [  2 100   3]


## 6. Stacking & Splitting

In [17]:
# np.stack()
P = np.zeros(5, int)
Q = np.full(5, 7)

print('P:', P)
print('Q:', Q)

print('np.stack([P, Q]):\n', np.stack([P, Q]))
print('np.stack([P, Q], 1):\n', np.stack([P, Q], 1))

P: [0 0 0 0 0]
Q: [7 7 7 7 7]
np.stack([P, Q]):
 [[0 0 0 0 0]
 [7 7 7 7 7]]
np.stack([P, Q], 1):
 [[0 7]
 [0 7]
 [0 7]
 [0 7]
 [0 7]]


In [18]:
# More on P & Q
print('np.vstack((P, Q)):\n', np.vstack((P, Q)))
print('np.hstack((P, Q)):\n', np.hstack((P, Q)))
print('np.column_stack((P, Q)):\n', np.column_stack((P, Q)))

np.vstack((P, Q)):
 [[0 0 0 0 0]
 [7 7 7 7 7]]
np.hstack((P, Q)):
 [0 0 0 0 0 7 7 7 7 7]
np.column_stack((P, Q)):
 [[0 7]
 [0 7]
 [0 7]
 [0 7]
 [0 7]]


In [19]:
# np.vstack() & np.hstack()
R = np.array([[3, 7], [1, 2]])
S = np.array([[0, 9], [5, 4]])

print('R:\n', R)
print('S:\n', S)

print('np.vstack((R, S)):\n', np.vstack((R, S)))
print('np.hstack((R, S)):\n', np.hstack((R, S)))

R:
 [[3 7]
 [1 2]]
S:
 [[0 9]
 [5 4]]
np.vstack((R, S)):
 [[3 7]
 [1 2]
 [0 9]
 [5 4]]
np.hstack((R, S)):
 [[3 7 0 9]
 [1 2 5 4]]


In [20]:
# More on stacking #1
T = np.ones((2, 2, 3), int)
U = np.zeros((2, 2, 3), int)

print('T:\n', T)
print('U:\n', U)

# np.stack() always creates a new axis, while np.concatenate() doesn't
print('np.stack([T, U]):\n', np.stack([T, U]))
print('np.stack([T, U], 1):\n', np.stack([T, U], 1))
print('np.stack([T, U], 2):\n', np.stack([T, U], 2))
print('np.concatenate([T, U]):\n', np.concatenate([T, U]))
print('np.concatenate([T, U], 1):\n', np.concatenate([T, U], 1))
print('np.concatenate([T, U], 2):\n', np.concatenate([T, U], 2))
# vstack -> axis=0, hstack -> axis=1, dstack -> axis=2
print('np.vstack((T, U)):\n', np.vstack((T, U)))
print('np.hstack((T, U)):\n', np.hstack((T, U)))
print('np.dstack((T, U)):\n', np.dstack((T, U)))

T:
 [[[1 1 1]
  [1 1 1]]

 [[1 1 1]
  [1 1 1]]]
U:
 [[[0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]]]
np.stack([T, U]):
 [[[[1 1 1]
   [1 1 1]]

  [[1 1 1]
   [1 1 1]]]


 [[[0 0 0]
   [0 0 0]]

  [[0 0 0]
   [0 0 0]]]]
np.stack([T, U], 1):
 [[[[1 1 1]
   [1 1 1]]

  [[0 0 0]
   [0 0 0]]]


 [[[1 1 1]
   [1 1 1]]

  [[0 0 0]
   [0 0 0]]]]
np.stack([T, U], 2):
 [[[[1 1 1]
   [0 0 0]]

  [[1 1 1]
   [0 0 0]]]


 [[[1 1 1]
   [0 0 0]]

  [[1 1 1]
   [0 0 0]]]]
np.concatenate([T, U]):
 [[[1 1 1]
  [1 1 1]]

 [[1 1 1]
  [1 1 1]]

 [[0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]]]
np.concatenate([T, U], 1):
 [[[1 1 1]
  [1 1 1]
  [0 0 0]
  [0 0 0]]

 [[1 1 1]
  [1 1 1]
  [0 0 0]
  [0 0 0]]]
np.concatenate([T, U], 2):
 [[[1 1 1 0 0 0]
  [1 1 1 0 0 0]]

 [[1 1 1 0 0 0]
  [1 1 1 0 0 0]]]
np.vstack((T, U)):
 [[[1 1 1]
  [1 1 1]]

 [[1 1 1]
  [1 1 1]]

 [[0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]]]
np.hstack((T, U)):
 [[[1 1 1]
  [1 1 1]
  [0 0 0]
  [0 0 0]]

 [[1 1 1]
  [1 1 1]
  [0 0 0]
  [0 0 0]]]
np.dstack((

In [21]:
# np.block()
V = np.full((2, 2), 3)
W = np.full((2, 2), 5)

print('V:\n', V)
print('W:\n', W)

print('np.block([[V], [W]]):\n', np.block([[V], [W]]))
print('np.block([[W], [V]]):\n', np.block([[W], [V]]))
print('np.block([V, W]):\n', np.block([V, W]))
print('np.block([V.flatten(), W.flatten(), V.flatten()]):\n', np.block([V.flatten(), W.flatten(), V.flatten()]))

V:
 [[3 3]
 [3 3]]
W:
 [[5 5]
 [5 5]]
np.block([[V], [W]]):
 [[3 3]
 [3 3]
 [5 5]
 [5 5]]
np.block([[W], [V]]):
 [[5 5]
 [5 5]
 [3 3]
 [3 3]]
np.block([V, W]):
 [[3 3 5 5]
 [3 3 5 5]]
np.block([V.flatten(), W.flatten(), V.flatten()]):
 [3 3 3 3 5 5 5 5 3 3 3 3]


In [22]:
# np.split()
X = np.arange(16).reshape(4, 4)

print('X:\n', X)

# Split by rows
print('np.split(X, 2):\n', np.split(X, 2))
# Split by columns
print('np.split(X, 2, 1):\n', np.split(X, 2, 1))

X:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
np.split(X, 2):
 [array([[0, 1, 2, 3],
       [4, 5, 6, 7]]), array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])]
np.split(X, 2, 1):
 [array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]]), array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])]


In [23]:
# More on splitting
Y = np.arange(9).reshape(3, 3)

print('Y:\n', Y)

print('np.array_split(Y, 2):\n', np.array_split(Y, 2))
print('np.vsplit(Y, 3):\n', np.vsplit(Y, 3))
print('np.hsplit(Y, 3):\n', np.hsplit(Y, 3))

Y:
 [[0 1 2]
 [3 4 5]
 [6 7 8]]
np.array_split(Y, 2):
 [array([[0, 1, 2],
       [3, 4, 5]]), array([[6, 7, 8]])]
np.vsplit(Y, 3):
 [array([[0, 1, 2]]), array([[3, 4, 5]]), array([[6, 7, 8]])]
np.hsplit(Y, 3):
 [array([[0],
       [3],
       [6]]), array([[1],
       [4],
       [7]]), array([[2],
       [5],
       [8]])]


## 7. Randomness

In [24]:
print('rand():', np.random.rand())
print('rand(3):', np.random.rand(3))
print('rand(2, 3):\n', np.random.rand(2, 3))
print('random_sample(2, 3):\n', np.random.random_sample((2, 3)))

for _ in range(2):
    np.random.seed(42)     # Ensure the following function always generates the same random values
    print('randn(3):', np.random.randn(3))

print('randint(100):', np.random.randint(100))
print('randint(50, 70, 5):', np.random.randint(50, 70, 5), end='\n\n')

print('choice(6, 3):', np.random.choice(6, 3))
# You can also set custom probability for each element
print('choice(6, 3):', np.random.choice(6, 3, p=[0.1, 0, 0.1, 0.5, 0.2, 0.1]), end='\n\n')

rand(): 0.2295605980551022
rand(3): [0.22591442 0.9585092  0.28683745]
rand(2, 3):
 [[0.58873522 0.22350433 0.49827002]
 [0.00961567 0.993996   0.4029805 ]]
random_sample(2, 3):
 [[0.98140146 0.3207399  0.9645791 ]
 [0.58161436 0.19126175 0.21200651]]
randn(3): [ 0.49671415 -0.1382643   0.64768854]
randn(3): [ 0.49671415 -0.1382643   0.64768854]
randint(100): 82
randint(50, 70, 5): [60 60 53 57 52]

choice(6, 3): [5 4 1]
choice(6, 3): [4 5 0]



In [25]:
Z = np.arange(8)
Z_ = np.arange(8).reshape(2, 4)

print('Z:\n', Z)
print('Z_:\n', Z_, end='\n\n')

# print('shuffle():', np.random.shuffle(Z))     # return None (i.e. shuffles Z directly)
np.random.shuffle(Z)
print('shuffle(Z):', Z)
print('permutation(Z):', np.random.permutation(Z), end='\n\n')

# Notice: applying shuffle()/permutation() on multi-dimensional array shuffles along the first axis
np.random.shuffle(Z_)
print('shuffle(Z_):\n', Z_)
print('permutation(Z_):\n', np.random.permutation(Z_))

Z:
 [0 1 2 3 4 5 6 7]
Z_:
 [[0 1 2 3]
 [4 5 6 7]]

shuffle(Z): [2 5 6 1 7 0 4 3]
permutation(Z): [0 5 3 7 6 4 2 1]

shuffle(Z_):
 [[0 1 2 3]
 [4 5 6 7]]
permutation(Z_):
 [[0 1 2 3]
 [4 5 6 7]]


## 8. Other Functions

In [26]:
# asarray() and astype()
a = [1, 2, 3]
print('np.asarray(a) is a:', np.asarray(a) is a)

b = np.array([1, 2, 3])
print('np.asarray(b) is b:', np.asarray(b) is b)
print('np.array(b) is b:', np.array(b) is b, end='\n\n')

c = np.array([4.0, 2.4, 1.5])
print('c:', c)
print('c.astype(int):', c.astype(int))

np.asarray(a) is a: False
np.asarray(b) is b: True
np.array(b) is b: False

c: [4.  2.4 1.5]
c.astype(int): [4 2 1]


In [27]:
# maximum() & minimum() + reduce()
d = np.array([2, 7, 1])
e = np.array([0, 9, 3])

print('maximum(d, e):', np.maximum(d, e))
print('minimum(d, e):', np.minimum(d, e))

print('max(d)', np.max(d))
print('maximum.reduce(d)', np.maximum.reduce(d))

f = np.vstack((d, e))
print('maximum.reduce(f)', np.maximum.reduce(f))

maximum(d, e): [2 9 3]
minimum(d, e): [0 7 1]
max(d) 7
maximum.reduce(d) 7
maximum.reduce(f) [2 9 3]


In [28]:
# identity() & eye()
print('identity(3):\n', np.identity(3))
print('eye(3):\n', np.eye(3))
print('eye(3, k=1):\n', np.eye(3, k=1))
print('eye(3, 5):\n', np.eye(3, 5))

identity(3):
 [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
eye(3):
 [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
eye(3, k=1):
 [[0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 0.]]
eye(3, 5):
 [[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]]
