Import Numpy

In [2]:
import numpy as np

Numpy Version

In [3]:
np.__version__

'1.21.5'

Creating Arrays from Python Lists

In [4]:
#integer array:
np.array([1,2,3,4,5])

array([1, 2, 3, 4, 5])

In [5]:
#upcast to floating point array:
np.array([1,2,3,4,5.0])

array([1., 2., 3., 4., 5.])

In [6]:
#explicitly specify data type using dtype keyword:
np.array([1, 2, 3, 4, 5], dtype = 'float32')

array([1., 2., 3., 4., 5.], dtype=float32)

Creating Arrays from Scratch

Especially for larger arrays, it is more efficient to create arrays from scratch using routines built into NumPy

In [9]:
#create a length-10 integer array filled with zeros:
np.zeros(10, dtype = np.int16)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

In [8]:
#create a length-10 floating point array filled with zeros:
np.zeros(10, dtype = np.float64)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [9]:
#create a 3x4 floating point array filled with 1s:
np.ones((3,4), dtype = np.float64)

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [10]:
#create a 3x4 array filled with 2.1416:
np.full((3,4), 2.1416)

array([[2.1416, 2.1416, 2.1416, 2.1416],
       [2.1416, 2.1416, 2.1416, 2.1416],
       [2.1416, 2.1416, 2.1416, 2.1416]])

In [13]:
# Create an array filled with a linear sequence 
# using np.arange(start, end, step)
# (this is similar to the built-in range() function)
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [14]:
#Create an array of five values evenly spaced between 0 and 1
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [15]:
# Create a 3x3 array of uniformly distributed 
# random values between 0 and 1
np.random.random((3, 3))

array([[0.03791088, 0.78660548, 0.10341914],
       [0.33326561, 0.96599776, 0.91053833],
       [0.44589747, 0.51386971, 0.58641007]])

In [10]:
# Create a 3x3 array of normally distributed random values 
# with mean 0 and standard deviation 1
np.random.normal(0, 1, (3,3))

array([[-0.36550118,  0.47678277,  0.08550604],
       [-0.19495302, -0.26316106, -0.95765234],
       [ 1.30182827,  0.72161824, -1.08255974]])

In [11]:
# Create a 3x3 array of random integers in the interval [0, 10]
np.random.randint(0, 10, (3, 3))

array([[4, 4, 2],
       [4, 4, 9],
       [2, 6, 1]])

In [12]:
# Create a 5x5 identity matrix
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [13]:
# Create an uninitialized array of five integers
# The values will be whatever happens to already exist at that memory location
np.empty(5)

array([1., 1., 1., 1., 1.])

NumPy Array Attributes

In [15]:
# seed for reproducibility
# Ensure that the same random arrays are generated each time this code is run
np.random.seed(0)

x1 = np.random.randint(10, size=6) # One-dimensional array
x2 = np.random.randint(10, size=(3, 4)) # Two-dimensional array
x3 = np.random.randint(10, size=(3, 4, 5)) # Three-dimensional array

In [21]:
# Example of NumPy array attributes
print("x3 ndim: ", x3.ndim) # number of dimensions
print("x3 shape:", x3.shape) # size of each dimension
print("x3 size: ", x3.size) # total size of the array
print("dtype:", x3.dtype) # data type of array
print("itemsize:", x3.itemsize, "bytes") # size (in bytes) of each array element
print("nbytes:", x3.nbytes, "bytes") # total size (in bytes) of the array

x3 ndim:  3
x3 shape: (3, 4, 5)
x3 size:  60
dtype: int32
itemsize: 4 bytes
nbytes: 240 bytes


In general, we expect that nbytes is equal to itemsize times size .

Array Indexing

In a one-dimensional array, the ith value (counting from zero) can be accessed by specifying the desired index in square brackets, just as with Python lists.

In [24]:
x1
x1[0]
x1[-1]

9

In a multi-dimensional array, items can be accessed using a comma-separated tuple of indices.

In [25]:
x2
x2[0, 0]

3

Values can also be modified using any of the above index notation.

In [26]:
x2[0, 0] = 9 
x2

array([[9, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

But remember that, unlike Python lists, NumPy arrays have a fixed type. This means, for example, that if you attempt to insert a floating-point value to an integer array, the value will be silently truncated. Don't be caught unaware by this behavior!

In [27]:
x1[0] = 2.1416 # this will be truncated!
x1

array([2, 0, 3, 3, 7, 9])

Array Slicing

The NumPy slicing syntax follows that of the standard Python list.
array_name[start:stop:step]
If any of these are unspecified, they default to the values start=0 , stop=size of dimension , step=1 .

In [28]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [29]:
x[:5] # first five elements

array([0, 1, 2, 3, 4])

In [30]:
x[3:7] # sub-array from index 3 to 7

array([3, 4, 5, 6])

In [31]:
x[::2] # starting from 0 and step is 2

array([0, 2, 4, 6, 8])

A potentially confusing case is when the step value is negative. In this case, the defaults for start and stop are swapped. This becomes a convenient way to reverse an array.

In [32]:
x[::-1] # all elements, reversed

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [33]:
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [34]:
x[5::-2] # reversed every other from index 5

array([5, 3, 1])

Multi-dimensional slices work in the same way, with multiple slices separated by commas.

In [35]:
x2

array([[9, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [36]:
x2[:2, :3] # two rows, three columns

array([[9, 5, 2],
       [7, 6, 8]])

In [37]:
x2[:3, ::2] # all rows, every other column

array([[9, 2],
       [7, 8],
       [1, 7]])

In [38]:
x2[::-1, ::-1] # reversed

array([[7, 7, 6, 1],
       [8, 8, 6, 7],
       [4, 2, 5, 9]])

Accessing array rows and columns

One commonly needed routine is accessing of single rows or columns of an array. This can be done by combining indexing and slicing, using an empty slice marked by a single colon (:).

In [39]:
x2

array([[9, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [40]:
print(x2[:, 0]) # first column of x2

[9 7 1]


In [41]:
print(x2[0, :]) # first row of x2

[9 5 2 4]


Subarrays as no-copy views

NumPy array slicing differs from Python list slicing:
in lists, slices will be copies, and 
in NumPy array slices return views.

In [42]:
print(x2)

[[9 5 2 4]
 [7 6 8 8]
 [1 6 7 7]]


In [44]:
# extract a 2×2 subarray from x2 array 
x2_sub = x2[:2, :2] 
print(x2_sub)

[[9 5]
 [7 6]]


In [45]:
# modify x2_sub subarray
x2_sub[0, 0] = 88
print(x2_sub)

[[88  5]
 [ 7  6]]


In [46]:
# original array x2 is also changed 
print(x2)

[[88  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


This default behavior is useful when we work with large datasets, we can access and process pieces of these datasets without the need to copy the underlying data buffer.

Creating copies of arrays

copy() method can be used to copy an array or subarray.
In this case if we now modify this new array or subarray, the original array is not touched/changd.

In [47]:
x2 # original array

array([[88,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [48]:
# copy a subarray
x2_sub_copy = x2[:2, :2].copy()
print(x2_sub_copy)

[[88  5]
 [ 7  6]]


In [50]:
# now, modify the subarray
x2_sub_copy[0, 0] = 44
print(x2_sub_copy)


[[44  5]
 [ 7  6]]


In [51]:
# original array x2 is not touched
print(x2)

[[88  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


Reshaping of Arrays

Using reshape() method 
Size of the initial array must match the size of the reshaped array

In [52]:
# 3×3
grid = np.arange(1, 10).reshape((3,3))
print(grid)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


Array Concatenation

Concatenation, or joining of two arrays in NumPy, is primarily accomplished using np.concatenate , np.vstack , and np.hstack .

In [53]:
# Using concatenate() method
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [54]:
# More than two arrays at once
z = [99, 99, 99]
print(np.concatenate([x, y, z]))

[ 1  2  3  3  2  1 99 99 99]


In [55]:
# concatenate two dimentional array
grid = np.array([[1, 2, 3],
                    [4, 5, 6]])
# concatenate along the first axis
np.concatenate([grid, grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [56]:
# concatenate along the second axis (zero-indexed)
np.concatenate([grid, grid], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

For working with arrays of mixed dimensions, it can be clearer to use the np.vstack (vertical stack) and np.hstack (horizontal stack) functions.

In [57]:
x = np.array([1, 2, 3]) 
grid = np.array([[9, 8, 7], 
                 [6, 5, 4]])
# vertically stack the arrays
np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [58]:
# horizontally stack the arrays
y = np.array([[99],
                [99]])
np.hstack([grid, y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

Array Splitting

The opposite of concatenation is splitting, which is implemented using np.split , np.hsplit , and np.vsplit functions. 
For each of these, we can pass a list of indices giving the split points

In [60]:
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5])
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


Notice that N split-points, leads to N + 1 subarrays. The related functions np.hsplit and np.vsplit are similar.

In [61]:
grid = np.arange(16).reshape((4, 4)) 
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [62]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [63]:
left, right = np.hsplit(grid, [2])
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


Iterating Over NumPy Array

NumPy contains an iterator object numpy.nditer , which is an efficient multidimensional iterator object to iterate over an array.

In [64]:
arr = np.arange(1, 10).reshape((3,3)) 
arr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [65]:
# iterating the array, arr, using numpy.nditer()
for i in np.nditer(arr): 
    print(i)

1
2
3
4
5
6
7
8
9
