In [15]:
# Lets first discuss some useful array attributes
# Lets create a 1D, 2D, and 3D array filled with random numbers which will also be seeded
import numpy as np
np.random.seed(0)

x1 = np.random.randint(10, size=6)
x2 = np.random.randint(10, size=(3,4))
x3 = np.random.randint(10, size=(3, 4, 5))

# Each array has ndim (# of dimensions), shape (size of dimension), and size (size of array)

print("x3 dim: ", x3.ndim)
print("x3 shape: ", x3.shape)
print("x3 size: ", x3.size)

('x3 dim: ', 3)
('x3 shape: ', (3L, 4L, 5L))
('x3 size: ', 60)


In [16]:
# We can also find out the data type
print("dtype: ", x3.dtype)

('dtype: ', dtype('int32'))


In [17]:
# itemsize and nbytes lists the size (in bytes) of each array element and total size of the array
print("itemsize: ", x3.itemsize, "bytes")
print("nbytes: ", x3.nbytes, "bytes")

('itemsize: ', 4, 'bytes')
('nbytes: ', 240, 'bytes')


In [18]:
# In 1D arrays, we can access the i'th value by specifying desired index in square brackets
x1

array([5, 0, 3, 3, 7, 9])

In [19]:
x1[0]

5

In [20]:
x1[4]

7

In [21]:
# To index from the end, use negative indicies
x1[-1]

9

In [22]:
x1[-2]

7

In [23]:
# For Multi-D arrays, we acces items using comma-seperated tuple of indicies
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [24]:
x2[0, 0]

3

In [25]:
x2[2, 0]

1

In [26]:
x2[2, -1]

7

In [27]:
# We can also modify any of the elements in the array
x2[0, 0] = 12
x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [28]:
# Note: NumPy arrays have a fixed data type so if you insert a floating-point value into an integer array, that value will be truncated
x1[0] = 3.14159
x1

array([3, 0, 3, 3, 7, 9])

In [29]:
# Just like using square brackets to access individual array elements, we can use : to access subarrays
# This is the NumPy slicing syntax: x[start:stop:step]
# Lets try this with 1D arrays
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [30]:
# Note: The arange() function INCLUDES the first number (zero) but EXCLUDES the last (10)
# Lets slice and get the first 5 elements
x[:5]

array([0, 1, 2, 3, 4])

In [31]:
# Elements after , and including, index 5
x[5:]

array([5, 6, 7, 8, 9])

In [32]:
# Middle subarray
x[4:7]

array([4, 5, 6])

In [33]:
# Slice every other element
x[::2]

array([0, 2, 4, 6, 8])

In [34]:
# Slice every other element starting at index 1
x[1::2]

array([1, 3, 5, 7, 9])

In [35]:
# Slice all elements in reverse order
x[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [36]:
# Slice every other elements in reverse order starting from index 5
x[5::-2]

array([5, 3, 1])

In [37]:
# Lets do slicing with 2D arrays now
x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [38]:
# Slice 2 rows and 3 columns
x2[:2, :3]

array([[12,  5,  2],
       [ 7,  6,  8]])

In [39]:
# Slice 3 rows and every other column
x2[:3, ::2]

array([[12,  2],
       [ 7,  8],
       [ 1,  7]])

In [40]:
# We commonly need to access single rows and columns of an array
# We can do this by combining indexing and slicing using an empty slice mark (:)
# Lets print the first column only of x2
print(x2[:, 0])

[12  7  1]


In [41]:
# Print the first row of x2
print(x2[0, :])

[12  5  2  4]


In [42]:
# One key thing to note about array slices: they return VIEWS rather than COPIES of the array data
# This is where NumPy array slicing differs from Python list slicing: in lists, slices are copies
# Consider our 2D array from before:
print(x2)

[[12  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


In [44]:
# Lets extract a 2x2 subarray from this
x2_sub = x2[:2, :2]
print(x2_sub)

[[12  5]
 [ 7  6]]


In [45]:
# Now if we modify, we will see that the original subarray is changed
x2_sub[0,0] = 99
print(x2_sub)

[[99  5]
 [ 7  6]]


In [46]:
print(x2)

[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


In [47]:
# As we can see, we extract a VIEW of the ACTUAL array by slicing and not just a COPY
# The difference is we can access/change pieces of these potentially large datasets without the need to copy the underlying data buffer
# Despite this nice feature of views, it is sometimes useful to explicitly copy the data within an array/subarray
# We can do this with the copy() method
x2_sub_copy = x2[:2, :2].copy()
print(x2_sub_copy)

[[99  5]
 [ 7  6]]


In [48]:
# Now if we modify, we only modify the COPY of the original array and NOT the original array
x2_sub_copy[0, 0] = 42
print(x2_sub_copy)

[[42  5]
 [ 7  6]]


In [49]:
print(x2)

[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


In [50]:
# Another useful operation is reshaping arrays
# Most flexible way is to use "reshape()" method
# Lets put the numbers 1-9 in a 3x3 grid
grid = np.arange(1, 10).reshape((3, 3))
print(grid)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [51]:
# Note: For this to work, size of initial array MUST match size of reshaped array
# Another common reshaping pattern is the conversion of 1D array to 2D row/column matrix
# This is done with reshape OR newaxis keyword WITHIN slicing operation
# Lets make a 1D array into a 1x3 array
x = np.array([1, 2, 3])
x.reshape((1, 3))

array([[1, 2, 3]])

In [52]:
# Now with newaxis
x[np.newaxis, :]

array([[1, 2, 3]])

In [53]:
# Now, 3x1 array
x.reshape((3, 1))

array([[1],
       [2],
       [3]])

In [54]:
# Now, newaxis
x[:, np.newaxis]

array([[1],
       [2],
       [3]])

In [55]:
# All of the previous routines involved a single array
# It is also possible to combine multiple arrays and spilt a single array into multiple ones
# Concatenation (or joining) of 2 arrays in NumPy is done with np.concatenate, np.vstack, and np.hstack
# np.concatenate takes a tuple/list of arrays as its first argument like below
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [56]:
# Can also concatenate 2 arrays at once
z = [99, 99, 99]
print(np.concatenate([x, y, z]))

[ 1  2  3  3  2  1 99 99 99]


In [58]:
# np.concatenate can also be used for 2D arrays
grid = np.array([[1, 2, 3],
                 [4, 5, 6]])
# concatenate along the HORIZONTAL AXIS
np.concatenate([grid, grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [60]:
# Concatenate along the VERTICAL AXIS
np.concatenate([grid, grid], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [61]:
# For multi-D arrays it is clearer to use vstack and hstack (vertical and horizontal stack)
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7],
                 [6, 5, 4]])
# Lets Vertically stack the arrays
np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [62]:
# Now, horizontally stack the arrays
y = np.array([[99],
              [99]])
np.hstack([grid, y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

In [64]:
# The opposite of concatenation is splitting 
# It is implemented by functions np.split, np.hsplit, np.vsplit
# For each, we can pass a list of indicies giving the split points
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5])
print(x1, x2, x3)

(array([1, 2, 3]), array([99, 99]), array([3, 2, 1]))


In [68]:
# To clarify, we split the array x between index 3 and 5 (non-inclusive)
# We then put the resulting slices into x1, x2, x3
# Note: N split points gives us N+1 subarrays
# np.hsplit and np.vsplit are also similar:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [70]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [71]:
left, right = np.hsplit(grid, [2])
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]
