# Chapter 2: Extending Python Using NumPy

## Creating NumPy Arrays

In [2]:
import numpy as np

In [3]:
# creates a range from 0 to 9
a1 = np.arange(10)
print(a1)
print(a1.shape)

[0 1 2 3 4 5 6 7 8 9]
(10,)


In [4]:
# creates a range from 0 to 9, step 2
a2 = np.arange(0,10,2)
print(a2)

[0 2 4 6 8]


In [5]:
# create an array with all 0s
a3 = np.zeros(5)
print(a3)
print(a3.shape)

[0. 0. 0. 0. 0.]
(5,)


In [6]:
# array of rank 2 with all 0s; 2 rows and 3 columns
a4 = np.zeros((2,3))
print(a4.shape)
print(a4)

(2, 3)
[[0. 0. 0.]
 [0. 0. 0.]]


In [7]:
# array of rank with all 8s
a5 = np.full((2,3), 8)
print(a5)

[[8 8 8]
 [8 8 8]]


In [8]:
# 4x4 identity matrix
a6 = np.eye(4)
print(a6)

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


In [9]:
# rank 2 array (2 rows 4 columns) with random values in the half-open interval [0.0, 1.0)
a7 = np.random.random((2,4))
print(a7)

[[0.67654414 0.40590292 0.04561554 0.01514099]
 [0.19087284 0.19424972 0.92134739 0.39966427]]


In [10]:
# array from a Python list
list1 = [1,2,3,4,5]
r1 = np.array(list1)
print(r1)

[1 2 3 4 5]


## Array Indexing

In [11]:
print(r1[0])
print(r1[1])

1
2


In [12]:
# create array two-dimensional
list2 = [6,7,8,9,0]
r2 = np.array([list1, list2])
print(r2)

[[1 2 3 4 5]
 [6 7 8 9 0]]


In [13]:
print(r2.shape)
print(r2[0,0])
print(r2[0,1])
print(r2[1,0])

(2, 5)
1
2
6


#### Boolean Indexing

In [14]:
print(r1>2)

[False False  True  True  True]


In [15]:
print(r1[r1>2])

[3 4 5]


In [16]:
nums = np.arange(20)
print(nums)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


In [17]:
odd_num = nums[nums % 2 == 1]
print(odd_num)

[ 1  3  5  7  9 11 13 15 17 19]


#### Slicing Arrays

In [18]:
a = np.array([[1,2,3,4,5],[4,5,6,7,8],[9,8,7,6,5]])
print(a)

[[1 2 3 4 5]
 [4 5 6 7 8]
 [9 8 7 6 5]]


In [19]:
# row 1 to 3 (not inclusive) and first 3 columns
b1 = a[1:3, :3]
print(b1)

[[4 5 6]
 [9 8 7]]


In [20]:
b2 = a[-2: ,-2:]
print(b2)

[[7 8]
 [6 5]]


#### NumPy Slice is a Reference

In [21]:
# b3 is pointing to a subset of a
b3 = a[1:, 2:]
print(b3)

[[6 7 8]
 [7 6 5]]


In [22]:
# b3[0,2] is pointing to a[1,4]
b3[0,2] = 88

In [23]:
print(a)

[[ 1  2  3  4  5]
 [ 4  5  6  7 88]
 [ 9  8  7  6  5]]


In [24]:
# the result of the slicing is dependent on how you slice it
b4 = a[2:, :]
# The result is a rank 2 array
print(b4)
print(b4.shape)

[[9 8 7 6 5]]
(1, 5)


In [25]:
b5 = a[2, :]
# The result is a rank 1 array
print(b5)

[9 8 7 6 5]


In [26]:
print(b5.shape)

(5,)


## Reshaping Arrays

In [27]:
b5 = b5.reshape(1,-1)
print(b5)

[[9 8 7 6 5]]


In [28]:
b4.reshape(-1,)
print(b4)

[[9 8 7 6 5]]


## Array Math

In [29]:
x1 = np.array([[1,2,3],[4,5,6]])
y1 = np.array([[7,8,9],[2,3,4]])

In [30]:
# Add two arrays
print(x1 + y1)

[[ 8 10 12]
 [ 6  8 10]]


In [31]:
# vector calculations
x = np.array([2,3])
y = np.array([4,2])
z = x + y

In [32]:
print(z)

[6 5]


In [33]:
# np.add() function to add two arrays
np.add(x1,y1)

array([[ 8, 10, 12],
       [ 6,  8, 10]])

In [34]:
# subtraction
print(x1 - y1)
np.subtract(x1, y1)

[[-6 -6 -6]
 [ 2  2  2]]


array([[-6, -6, -6],
       [ 2,  2,  2]])

In [35]:
# multiplication
print(x1 * y1)
np.multiply(x1, y1)

[[ 7 16 27]
 [ 8 15 24]]


array([[ 7, 16, 27],
       [ 8, 15, 24]])

In [36]:
# division
print(x1 / y1)
np.divide(x1, y1)

[[0.14285714 0.25       0.33333333]
 [2.         1.66666667 1.5       ]]


array([[0.14285714, 0.25      , 0.33333333],
       [2.        , 1.66666667, 1.5       ]])

##### Example
Suppose you have three arrays: one containing the names of a group of people, another the corresponding heights of these individuals, and the last one the corresponding weights of the individuals in the group

In [37]:
names = np.array(['Ann', 'Joe', 'Mark'])
heights = np.array([1.5, 1.78, 1.6])
weights = np.array([65, 46, 59])

You want to calculate the Body Mass Index (BMI) of this group
of people. The formula to calculate BMI is as follows:
* Divide the weight in kilograms (kg) by the height in meters (m)
* Divide the answer by the height again

In [38]:
# calculate the BMI
bmi = weights/heights **2
print(bmi)

[28.88888889 14.51836889 23.046875  ]


Using the BMI, you can classify a person as healthy, overweight, or underweight using the following categories:
* Underweight if BMI < 18.5
* Overweight if BMI > 25
* Normal weight if 18.5 <= BMI <= 25

In [39]:
print("Overweight: " , names[bmi>25])

Overweight:  ['Ann']


In [40]:
print("Underweight: " , names[bmi<18.5])

Underweight:  ['Joe']


In [41]:
print("Healthy: " , names[(bmi>=18.5) & (bmi<=25)]) 

Healthy:  ['Mark']


#### Dot Product

In [42]:
x = np.array([2,3])
y = np.array([4,2])
np.dot(x,y)

14

In [43]:
# matrix multiplication
x2 = np.array([[1,2,3],[4,5,6]])
y2 = np.array([[7,8],[9,10],[11,12]])

In [44]:
print(np.dot(x2,y2))

[[ 58  64]
 [139 154]]


#### Matrix

In [45]:
# A matrix es strictly two-dimensional, ndarray can be multidimensional

x2 = np.matrix([[1,2],[4,5]])
y2 = np.matrix([[7,8],[2,3]])

In [46]:
x2

matrix([[1, 2],
        [4, 5]])

In [47]:
y2

matrix([[7, 8],
        [2, 3]])

Convert a NumPy array to a matrix using the asmatrix() function

In [48]:
x1 = np.array([[1,2],[4,5]])
y1 = np.array([[7,8],[2,3]])
x1 = np.asmatrix(x1)
y1 = np.asmatrix(y1)

print(x1)
print(y1)

[[1 2]
 [4 5]]
[[7 8]
 [2 3]]


Difference between an ndarray and a matrix occurs when you perform multiplications on them

In [49]:
# When multiplying two ndarray objects, the result is the element-by-element multiplication
x1 = np.array([[1,2],[4,5]])
y1 = np.array([[7,8],[2,3]])
print(x1 * y1)

[[ 7 16]
 [ 8 15]]


In [50]:
#  When multiplying two matrix objects, the result is the dot product
x2 = np.matrix([[1,2],[4,5]])
y2 = np.matrix([[7,8],[2,3]])
print(x2 * y2)

[[11 14]
 [38 47]]


#### Cumulative Sum

Very often, when dealing with numerical data, there is a need to find the cumulative sum of numbers in a NumPy array.

In [51]:
a = np.array([(1,2,3),(4,5,6), (7,8,9)])
print(a)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [52]:
# prints the cumulative sum of all the elements in the array
print(a.cumsum())

[ 1  3  6 10 15 21 28 36 45]


The cumsum() function also takes in an optional argument—axis. Specifying an axis of 0 indicates that you want to get the cumulative sum of each column

In [53]:
# sum over rows for each of the 3 columns
print(a.cumsum(axis=1))

[[ 1  3  6]
 [ 4  9 15]
 [ 7 15 24]]


In [54]:
print(a.cumsum(axis=0))

[[ 1  2  3]
 [ 5  7  9]
 [12 15 18]]


#### NumPy Sorting

NumPy provides a number of efficient sorting functions that make it very easy
to sort an array. The first function for sorting is sort().

In [55]:
ages = np.array([34,12,37,5,13])
# does not modify the original array
sorted_ages = np.sort(ages)
print(sorted_ages)
print(ages)

[ 5 12 13 34 37]
[34 12 37  5 13]


If you want to sort the original array, call the _sort()_ function on the array itself as follows:

In [56]:
# modifies the array
ages.sort()
print(ages)

[ 5 12 13 34 37]


There is another function used for _sorting—argsort()_. The _argsort()_ function returns the indices that will sort an array. 

In [57]:
ages = np.array([34,12,37,5,13])
print(ages.argsort())

[3 1 4 0 2]


To print the sorted ages array, use the result of _argsort()_ as the index to the ages array

In [58]:
print(ages[ages.argsort()])

[ 5 12 13 34 37]


*What is the real use of argsort()?* Imagine that you have three arrays representing a list of people, along with their ages and heights:

In [59]:
persons = np.array(['Johnny','Mary','Peter','Will','Joe'])
ages = np.array([34,12,37,5,13])
heights = np.array([1.76,1.2,1.68,0.5,1.25])

Suppose that you want to sort this group of people by age. If you simply sort the _ages_ array by itself, the other two arrays would not be sorted correctly based on age. This is where _argsort()_ comes in really handy:

In [60]:
# performs a sort based on ages and returns an array of indices indicating the sort order
sort_indices = np.argsort(ages)
print(sort_indices)

[3 1 4 0 2]


In [61]:
print(persons[sort_indices])
print(ages[sort_indices])
print(heights[sort_indices])

['Will' 'Mary' 'Joe' 'Johnny' 'Peter']
[ 5 12 13 34 37]
[0.5  1.2  1.25 1.76 1.68]


They would now be sorted based on age. As you can see, Will is the youngest, followed by Mary, and so on. The corresponding height for each person would also be in the correct order.

If you wish to sort based on name, then simply use _argsort()_ on the persons array and feed the resulting indices into the three arrays:

In [62]:
# sort based on names
sort_indices = np.argsort(persons)
print(persons[sort_indices])
print(ages[sort_indices])
print(heights[sort_indices])

['Joe' 'Johnny' 'Mary' 'Peter' 'Will']
[13 34 12 37  5]
[1.25 1.76 1.2  1.68 0.5 ]


To reverse the order of the names and display them in descending order, use the Python`[::-1]` notation:

In [63]:
# reverse the order of a list
reverse_sort_indices = np.argsort(persons)[::-1]
print(persons[reverse_sort_indices])
print(ages[reverse_sort_indices])
print(heights[reverse_sort_indices])

['Will' 'Peter' 'Mary' 'Johnny' 'Joe']
[ 5 37 12 34 13]
[0.5  1.68 1.2  1.76 1.25]


## Array Assignment
When assigning NumPy arrays, you have to take note of how arrays are assigned. 

#### Copying by Reference

In [64]:
list1 = [[1,2,3], [5,6,7]]
a1 = np.array(list1)
print(a1)

[[1 2 3]
 [5 6 7]]


When you try to assign a1 to another variable, a2, a copy of the array is created:

In [65]:
# creates a copy by reference
a2 = a1
print(a1)
print(a2)

[[1 2 3]
 [5 6 7]]
[[1 2 3]
 [5 6 7]]


However, a2 is actually pointing to the original a1. So, any changes made to either array will affect the other

In [66]:
# make some changes to a2
a2[0][0] = 11
print(a1)

[[11  2  3]
 [ 5  6  7]]


In [67]:
print(a2)

[[11  2  3]
 [ 5  6  7]]


If a1 now changes shape, a2 will also be affected as follows:

In [68]:
# reshape a1
a1.shape = 1,-1
print(a1)

[[11  2  3  5  6  7]]


In [69]:
# a2 also changes shape
print(a2)

[[11  2  3  5  6  7]]


#### Copying by View (Shallow Copy)
NumPy has a view() function that allows you to create a copy of an array by reference, while at the same time ensuring that changing the shape of the original array does not affect the shape of the copy. This is known as a shallow copy. 

In [72]:
# creates a copy of a1 by reference; but changes in dimension in a1 will not affect a2
list1 = [[1,2,3,4], [5,6,7,8]]
a1 = np.array(list1)
a2 = a1.view()
print(a1)

[[1 2 3 4]
 [5 6 7 8]]


In [73]:
print(a2)

[[1 2 3 4]
 [5 6 7 8]]


As usual, modify a value in a1 and you will see the changes in a2:

In [74]:
# make some changes in a1
a1[0][0] = 11
print(a1)

[[11  2  3  4]
 [ 5  6  7  8]]


In [75]:
# changes is also seen in a2
print(a2)

[[11  2  3  4]
 [ 5  6  7  8]]


Up until now, the shallow copy is identical to the copying performed in the previous section. But with shallow copying, when you change the shape of a1, a2 is unaffected:

In [76]:
# change the shape of a1
a1.shape = 1,-1
print(a1)

[[11  2  3  4  5  6  7  8]]


In [77]:
# a2 does not change shape
print(a2)

[[11  2  3  4]
 [ 5  6  7  8]]


#### Copying by Value (Deep Copy)
If you want to copy an array by value, use the copy() function

In [78]:
list1 = [[1,2,3,4], [5,6,7,8]]
a1 = np.array(list1)
a2 = a1.copy()   # create a copy of a1 by value (deep copy)

The copy() function creates a deep copy of the array—it creates a complete copy of the array and its data. When you assign the copy of the array to another variable, any changes made to the shape of the original array will not affect its copy.

In [80]:
# make some changes in a1
a1[0][0] = 11
print(a1)

[[11  2  3  4]
 [ 5  6  7  8]]


In [81]:
# changes is not seen in a2
print(a2)

[[1 2 3 4]
 [5 6 7 8]]


In [82]:
# change the shape of a1
a1.shape = 1,-1
print(a1)

[[11  2  3  4  5  6  7  8]]


In [83]:
# a2 does not change shape
print(a2)

[[1 2 3 4]
 [5 6 7 8]]
