In [1]:
import numpy as np

In [2]:
# the python integer has some overhead (reference counting, type checking, etc.)
# it is a pointer to a position in memory containing all the python object information including the bytes of the integer value

In [3]:
# the list is a mutable multielement container in python
L = list(range(10))
L

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [4]:
type(L[0])

int

In [5]:
L2 = [str(c) for c in L]
L2

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [6]:
# we can even create a heterogeneous list
L3 = [True, "2", 3.0, 4]
[type(item) for item in L3]
# flexibility comes at a cost: to allow these flexible types, each item in the list must contain its own type info, reference count, and other info: this is stored in a struct, so a list of N items requires the storage of N pointers to Python objects, and N structs to store the type info, reference count, and other info for each of these objects

[bool, str, float, int]

In [7]:
# fixed type arrays in python
# the array module can be used to create dense arrays of a uniform type
import array

L = list(range(10))
A = array.array("i", L)
A
# i is a type code indicating that the contents are integers but the numpy array is much more efficient than the python array

array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [8]:
# integer array:
np.array([1, 4, 2, 5, 3])

array([1, 4, 2, 5, 3])

In [9]:
# if the datatypes do not match, numpy will upcast if possible
np.array([3.14, 4, 2, 3])

array([3.14, 4.  , 2.  , 3.  ])

In [10]:
# we can explicitly set the datatype using the dtype keyword
np.array([1, 2, 3, 4], dtype=np.float32)

array([1., 2., 3., 4.], dtype=float32)

In [11]:
# np arrrays can be multidimensional then converted to tf tensors
np.array([range(i, i + 3) for i in [2, 4, 6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

In [12]:
# Create a length-10 integer array filled with 0s
np.zeros(10, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [13]:
# create a 3x5 floating-point array filled with 1s
np.ones((3, 5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [14]:
np.full((3, 5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [15]:
# create a linear sequence starting at 0, ending at 20, stepping by 2
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [16]:
# create an array of five values evenly spaced between 0 and 1
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [17]:
# create a 3x3 array of uniformly distributed random values between 0 and 1
np.random.random((3, 3))

array([[0.98960757, 0.00140477, 0.80644434],
       [0.32083934, 0.65540339, 0.00950333],
       [0.37224091, 0.99347959, 0.5653906 ]])

In [18]:
# create a 3x3 array of normally distributed random values with mean 0 and standard deviation 1
np.random.normal(0, 1, (3, 3))

array([[ 0.52016144,  0.85241674, -0.54548371],
       [ 0.14837769,  4.46909745, -0.67812081],
       [ 0.40796949,  0.20196147, -0.54337205]])

In [19]:
# create a 3x3 array of random integers in the interval [0, 10)
np.random.randint(0, 10, (3, 3))

array([[4, 5, 4],
       [6, 8, 4],
       [5, 5, 2]])

In [20]:
# create a 3x3 identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [21]:
# create an uninitialized array of three integers
np.empty(3)

array([1., 1., 1.])

In [22]:
rng = np.random.default_rng(seed=1701)
x1 = rng.integers(10, size=6)  # one-dimensional array
x2 = rng.integers(10, size=(3, 4))  # two-dimensional array
x3 = rng.integers(10, size=(3, 4, 5))  # three-dimensional array

In [23]:
print("x3 ndim: ", x3.ndim)
print("x3 shape: ", x3.shape)
print("x3 size: ", x3.size)
print("dtype: ", x3.dtype)

x3 ndim:  3
x3 shape:  (3, 4, 5)
x3 size:  60
dtype:  int64


Array Indexing:

In [24]:
x1

array([9, 4, 0, 3, 8, 6])

In [25]:
x1[0]

9

In [26]:
x1[4]

8

In [27]:
# to index from the end of the array, use negative indices
x1[-1]

6

In [28]:
x1[-2]

8

In [29]:
# in a multidimensional array, we access items using a comma-separated tuple of indices
x2

array([[3, 1, 3, 7],
       [4, 0, 2, 3],
       [0, 0, 6, 9]])

In [30]:
x2[0, 0]

3

In [32]:
x2[2, 0]

0

In [33]:
x2[2, -1]

9

In [34]:
# values can be modifed using any of the preceding index notation
x2[0, 0] = 12
x2

array([[12,  1,  3,  7],
       [ 4,  0,  2,  3],
       [ 0,  0,  6,  9]])

In [35]:
# numpy arrays have a fixed type and it will be upcast if types do not match
x1[0] = 3.14159
x1

array([3, 4, 0, 3, 8, 6])

In [36]:
# array slicing using :
x1

array([3, 4, 0, 3, 8, 6])

In [37]:
x1[:3]  # first three elements

array([3, 4, 0])

In [38]:
x1[3:]  # elements after index 3

array([3, 8, 6])

In [39]:
x1[1:4]  # middle subarray

array([4, 0, 3])

In [40]:
x1[::2]  # every other element

array([3, 0, 8])

In [41]:
x1[1::2]  # every other element starting at index 1

array([4, 3, 6])

In [42]:
x1[::-1]  # all elements reversed

array([6, 8, 3, 0, 4, 3])

In [43]:
x1[4::-2]  # reversed every other from index 4

array([8, 0, 3])

Multidimensional Subarrays:

In [44]:
x2

array([[12,  1,  3,  7],
       [ 4,  0,  2,  3],
       [ 0,  0,  6,  9]])

In [45]:
x2[:2, :3]  # two rows, three columns

array([[12,  1,  3],
       [ 4,  0,  2]])

In [46]:
x2[:3, ::2]  # all rows, every other column

array([[12,  3],
       [ 4,  2],
       [ 0,  6]])

In [47]:
x2[::-1, ::-1]  # reversed array

array([[ 9,  6,  0,  0],
       [ 3,  2,  0,  4],
       [ 7,  3,  1, 12]])

In [48]:
# accesing single columns and arrays
x2[:, 0]  # first column of x2

array([12,  4,  0])

In [49]:
x2[0, :]  # first row of x2

array([12,  1,  3,  7])

In [50]:
x2[0]  # equivalent to x2[0, :]

array([12,  1,  3,  7])

In [51]:
# numpy array slices are returned as views rather than copies of the data
print(x2)

[[12  1  3  7]
 [ 4  0  2  3]
 [ 0  0  6  9]]


In [53]:
# we extract a 2x2 subarray from this
x2_sub = x2[:2, :2]
print(x2_sub)

[[12  1]
 [ 4  0]]


In [55]:
# if we modify this subarray, we'll see that the original array is changed
x2_sub[0, 0] = 99
print(x2_sub)

[[99  1]
 [ 4  0]]


In [57]:
# we can see that the original array is changed when we modified the subarray
print(x2)
# this can be usefull when working with large datasets: we can access and process pieces of these datasets without the need to copy the underlying data buffer

[[99  1  3  7]
 [ 4  0  2  3]
 [ 0  0  6  9]]


In [58]:
# it is sometimes usefull to copy the data using the copy method
x2_sub_copy = x2[:2, :2].copy()
print(x2_sub_copy)

[[99  1]
 [ 4  0]]


In [59]:
# if we modify this the original array is not touched
x2_sub_copy[0, 0] = 42
print(x2_sub_copy)

[[42  1]
 [ 4  0]]


In [60]:
print(x2)

[[99  1  3  7]
 [ 4  0  2  3]
 [ 0  0  6  9]]


Reshaping of arrays

In [62]:
# another usefull type of operation is reshaping arrays using reshape
grid = np.arange(1, 10).reshape((3, 3))
print(grid)
# for this to work the size of the initial array must match the size of the reshaped array
# the reshape method will return a no-copy biew of the initial array

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [63]:
# a common reshaping operation is converting a one-dimensional array into a two-dimensional row or column matrix
x = np.array([1, 2, 3])
x.reshape((1, 3))  # row vector via reshape

array([[1, 2, 3]])

In [64]:
x.reshape((3, 1))  # column vector via reshape

array([[1],
       [2],
       [3]])

In [65]:
# a convienent shorhand for this is to use np.newaxis in the slicing syntax
x[np.newaxis, :]  # row vector via newaxis

array([[1, 2, 3]])

In [66]:
x[:, np.newaxis]  # column vector via newaxis

array([[1],
       [2],
       [3]])

array concatenation and splitting

In [67]:
# concatenatuion or joining of two arrays in Numpy is primarily acomeplished using the np.concatenate, np.vstack, and np.hstack
# np.concatenate takes a tuple or list of arrays as its first argument
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [68]:
# can concatenate more than two arrays at once
z = np.array([99, 99, 99])
print(np.concatenate([x, y, z]))

[ 1  2  3  3  2  1 99 99 99]


In [69]:
# it can be used for two dimensional arrays as well
grid = np.array([[1, 2, 3], [4, 5, 6]])
# concatenate along the first axis
np.concatenate([grid, grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [70]:
# concatenate along the second axis (zero-indexed)
np.concatenate([grid, grid], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [71]:
# for working with araays of different dimensions it can be clearer to stack them
np.vstack([x, grid])

array([[1, 2, 3],
       [1, 2, 3],
       [4, 5, 6]])

In [72]:
y = np.array([[99], [99]])
np.hstack([grid, y])
# for higher dimension arrays, np.dstack will stack arrays along the third axis

array([[ 1,  2,  3, 99],
       [ 4,  5,  6, 99]])

Splitting of arrays

In [73]:
# the opposite of concatenation is splitting
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5])  # we give the split points as a list of indices
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


In [74]:
# n split points lead to n + 1 subarrays
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [75]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [76]:
left, right = np.hsplit(grid, [2])
print(left)
print(right)
# np.dsplit will split arrays along the third axis

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]
