<a href='https://ai.meng.duke.edu'> = <img align="left" style="padding-top:10px;" src=https://storage.googleapis.com/aipi_datasets/Duke-AIPI-Logo.png>

# Introduction to NumPy

In [1]:
# We commonly import numpy as np
# We can then execute a numpy command as np.function()
import numpy as np

### Create a NumPy ndarray

In [5]:
# Method 1: from a Python list
x_list = [4.25, 3, 4, 5]
x_array = np.array(x_list)
print('x_array is: ',x_array)

# Create a multidimensional array from multiple lists - each one forms a row
z_array = np.array([[2,3,4],[4,5,6],[7,8,9],[10,11,12]])
print('z_array is: \n',z_array)

# Method 2: filled numpy arrays e.g. with 1s
y_array = np.ones(shape=(3,3), dtype=int)
print('y_array is: \n',y_array)

x_array is:  [4.25 3.   4.   5.  ]
z_array is: 
 [[ 2  3  4]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
y_array is: 
 [[1 1 1]
 [1 1 1]
 [1 1 1]]


In [6]:
# Convert an array back to a list
x_array = np.array([4.25, 3, 4, 5])
x_list = x_array.tolist()
x_list

[4.25, 3.0, 4.0, 5.0]

### Reshaping arrays

In [7]:
# Check the shape of an array in form (# rows, # cols)
z_array = np.array([[2,3,4],[4,5,6],[7,8,9],[10,11,12]])
print('z_array is: \n',z_array)
print(z_array.shape)

z_array is: 
 [[ 2  3  4]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
(4, 3)


In [8]:
# Reshape the array by specifying the dimensions we want
z_array = np.array([[2,3,4],[4,5,6],[7,8,9],[10,11,12]])
z_reshaped = z_array.reshape((6,2))
z_reshaped

array([[ 2,  3],
       [ 4,  4],
       [ 5,  6],
       [ 7,  8],
       [ 9, 10],
       [11, 12]])

Sometimes we need to add extra dimensions to an array or remove an empty dimension in order for it to be used as an input to a function or model which requires a certain size.

In [13]:
# Add an extra dimension to an array by specifying the axis to add.  The extra dimension is simply empty
z_array = np.array([[2,3,4],[4,5,6],[7,8,9],[10,11,12]])
z_reshaped = z_array.reshape((6,2))
z_3d = np.expand_dims(z_reshaped,axis=2)
z_3d.shape

(6, 2, 1)

In [14]:
# Remove a dimension which is empty (in this case, the 3rd dimension (axis=2) of our z_3d array)
z_2d = z_3d.squeeze()
z_2d.shape

(6, 2)

### Indexing and slicing arrays

In [12]:
# Retrieve an element of an array using its index value
x = np.array([1,2,3,4])
x[1]

2

In [15]:
# Retrieve a slice of an array
x = np.array([1,2,3,4])
x[0:3:2] # Use start:stop:step (if you leave step out it is assumed to be 1)

array([1, 3])

In [16]:
# Can also use this technique to reverse an array by using step = -1
x = np.array([1,2,3,4])
x[::-1]

array([4, 3, 2, 1])

In [22]:
# Retrieve an element of a multidimensional array using its index values (row,column)
x = np.array([[1,2],[3,4]])
print(x)
print(x[1,0])

[[1 2]
 [3 4]]
3


In [24]:
# Retrieve a multi-dimensional slice (rows,cols)
z_array = np.array([[2,3,4],[4,5,6],[7,8,9],[10,11,12]])
print(z_array)
print()
print(z_array[0:3,2])

[[ 2  3  4]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]

[4 6 9]


### Concatenating arrays

In [31]:
x = [[1,2],[3,4]]
y = [[5,6],[7,8]]
print('x: {}'.format(x))
print('y: {}'.format(y))

# Concatenate by adding the arrays together as rows (axis 0)
xy_axis0 = np.concatenate([x,y],axis=0)
print('xy_axis0: \n',xy_axis0)

# Concatenate by adding the arrays together as columns (axis 1)
xy_axis1 = np.concatenate([x,y],axis=1)
print('xy_axis1: \n',xy_axis1)

x: [[1, 2], [3, 4]]
y: [[5, 6], [7, 8]]
xy_axis0: 
 [[1 2]
 [3 4]
 [5 6]
 [7 8]]
xy_axis1: 
 [[1 2 5 6]
 [3 4 7 8]]


### Aggregations

In [33]:
x = np.array([[1,2,3],[4,5,6],[7,8,9]])
print('x: \n',x)

# Compute the sum of rows, columns and the array
print('Sums of rows: \n', np.sum(x, axis=1)) # Sum all columns for each row (axis 0)
print('Sums of columns: \n', np.sum(x, axis=0)) # Sum all rows for each column (axis 1)
print('Sum of the full array: \n', np.sum(x)) # Sum the full array

x: 
 [[1 2 3]
 [4 5 6]
 [7 8 9]]
Sums of rows: 
 [ 6 15 24]
Sums of columns: 
 [12 15 18]
Sum of the full array: 
 45


### Statistics

In [36]:
x = np.array([[1,2,3],[4,5,6],[7,8,9]])
print('x: \n',x)
print()

# Compute mean, min, max for each row
print('Max of rows are: ', np.max(x,axis=1))
print('Min of rows are: ', np.min(x,axis=1))
print('Means of rows are: ', np.mean(x,axis=1))
print()

# Compute mean, min, max for each column
print('Max of columns are: ', np.max(x,axis=0))
print('Min of columns are: ', np.min(x,axis=0))
print('Means of columns are: ', np.mean(x,axis=0))

x: 
 [[1 2 3]
 [4 5 6]
 [7 8 9]]

Max of rows are:  [3 6 9]
Min of rows are:  [1 4 7]
Means of rows are:  [2. 5. 8.]

Max of columns are:  [7 8 9]
Min of columns are:  [1 2 3]
Means of columns are:  [4. 5. 6.]


In [30]:
# Get the index of the maximum value in a vector
x = np.array([2,4,1,7,5])
print(f'Index of the maximum value is {np.argmax(x)}')
print(f'Index of the minimum value is {np.argmin(x)}')

Index of the maximum value is 3
Index of the minimum value is 2


### Unique values and counts of values
You can use `np.unique(array_name)` to get the unique values in an array.  You can also use `np.unique(array_name,return_counts=True)` to return the unique values and the count of each as a tuple (unique_vals,counts).

In [19]:
x = np.array([3,1,6,2,3,3,1,5,6])
print(f'Unique values are {np.unique(x)}')

Unique values are [1 2 3 5 6]
Unique values and counts are (array([1, 2, 3, 5, 6]), array([2, 1, 3, 1, 2]))


In [37]:
x = np.array([3,1,6,2,3,3,1,5,6])
unique_vals,counts = np.unique(x,return_counts=True)
print(f'Unique values are {unique_vals}')
print(f'Counts of each unique value are {counts}')

Unique values are [1 2 3 5 6]
Counts of each unique value are [2 1 3 1 2]


### Operations via Broadcasting
Operations on arrays can be performed using Python's standard operators, and are vectorized for much quicker calculation than performing them on each element in a loop

In [39]:
x = np.array([1,2,3])
print('x = ', x)
print('x+5 = ', x+5)
print('x*5 = ', x*5)

x =  [1 2 3]
x+5 =  [6 7 8]
x*5 =  [ 5 10 15]


In [40]:
print('ln(x) = ',np.log(x))
print('e^x = ',np.exp(x))

ln(x) =  [0.         0.69314718 1.09861229]
e^x =  [ 2.71828183  7.3890561  20.08553692]


### Comparisons & filtering arrays with Boolean masks
Comparison operators can be used on arrays to return Boolean (True/False) values. These Boolean values can then be used as a “mask” to filter arrays to only those values where the Boolean was True

In [41]:
x = np.array([1,2,3,4])
# Use comparison operators to return an array of Boolean values
print('x: ',x)
print('x>2: ',x>2)
print('x==4: ',x == 4)
print('x!=2: ',x!=2)

x:  [1 2 3 4]
x>2:  [False False  True  True]
x==4:  [False False False  True]
x!=2:  [ True False  True  True]


In [42]:
# The returned Boolean array can be used as a "mask" or filter on the original array
print(x[x>2]) # Returns all elements of x where x>2
print(x[x!=2]) # Returns all elements of x where x is not equal to 2

[3 4]
[1 3 4]


In [43]:
# You can also use the returned Boolean array as a "mask" or filter on another array
x = np.array([1,2,3,4])
y = np.array([10,20,30,40])
print(x>2) # Positions where the condition is true ("mask")
print(y[x>2]) # Returns all elements of y matching the positions where the condition is true

[False False  True  True]
[30 40]


In [48]:
# You can combine multiple conditions using bitwise Boolean operators (e.g. &,|)
x = np.array([1,2,3,4])
print([(x>2) & (x!=4)])

[array([False, False,  True, False])]


In [49]:
# You can get the indices which filter an array using a np.argwhere() with a comparision operator
x = np.array([1,2,3,4])
print('x: \n',x)
idxs = np.argwhere(x>2)
print(f'Indices where x>2: {idxs}')

x: 
 [1 2 3 4]
Indices where x>2: [[2]
 [3]]


### Sorting arrays
You can sort arrays and return either the sorted values or the indices of the sorted values

In [53]:
x = np.array([[3,1,6],[5,0,3]])
print('x: \n',x)

# Sort the array's rows
y = np.sort(x, axis=1) # Sort on axis 1 (across)
print('Sorted rows: \n',y)

# Sort the array's columns
z = np.sort(x, axis=0) # Sort on axis 0 (down)
print('Sorted columns: \n',z)

x: 
 [[3 1 6]
 [5 0 3]]
Sorted rows: 
 [[1 3 6]
 [0 3 5]]
Sorted columns: 
 [[3 0 3]
 [5 1 6]]


In [54]:
# Return the sorted array's indices using argsort()
rowsort_indices = np.argsort(x, axis=1) # Return indices from sorting on axis 1 (across)
print('Sorted row indices: \n',rowsort_indices)

colsort_indices = np.argsort(x, axis=0) # Return indices from sorting on axis 0 (down)
print('Sorted column indices: \n',colsort_indices)

Sorted row indices: 
 [[1 0 2]
 [1 2 0]]
Sorted column indices: 
 [[0 1 1]
 [1 0 0]]
