In [1]:
""" Numpy is the fundamental package for numeric computing with Python. 
It provides powerful ways to create, store, and / or manipulate data, which makes it able to seamlessly and speedily integrate with a wide variety of databases. 
This is also the foundation that Pandas is built on, which is a high-performance data-centric package that we will learn later in the course.

In this lecture, we will talk about creating array with certain data types, manipulating array, selecting elements from arrays, and loading dataset into array. 
Such functions are useful for manipulating data and understanding the functionalities of other common Python data packages.
 """
import numpy as np
import math


In [2]:
# Arrays are displayed as a list or list of lists and can be created through list as well. When creating an
# array, we pass in a list as an argument in numpy array
a = np.array([1, 2, 3])
print(a)

[1 2 3]


In [3]:
# We can print the number of dimensions of a list using the ndim attribute
print(a.ndim)


1


In [4]:
# If we pass in a list of lists in numpy array, we create a multi-dimensional array, for instance, a matrix
b = np.array([[1,2,3],[4,5,6]])
b

array([[1, 2, 3],
       [4, 5, 6]])

In [5]:
a = np.array([10,20,30,40])
b = np.array([1,2,3,4])

# a-b 
c = a - b 
print(c)

#a*b
d = a * b
print(d)

[ 9 18 27 36]
[ 10  40  90 160]


In [6]:
# With arithmetic manipulation, we can convert current data to the way we want it to be. Here's a real-world
# problem I face - I moved down to the United States about 6 years ago from Canada. In Canada we use celcius
# for temperatures, and my wife still hasn't converted to the US system which uses farenheit. With numpy I 
# could easily convert a number of farenheit values, say the weather forecase, to ceclius

# Let's create an array of typical Ann Arbor winter farenheit values

fahrenheit = np.array([0,-10, -5, -15, 0])

#conversion forumla (F - 32) x 5/9 = celsius 
celsius = (fahrenheit - 32) * 5/9
celsius

array([-17.77777778, -23.33333333, -20.55555556, -26.11111111,
       -17.77777778])

In [9]:
# Besides elementwise manipulation, it is important to know that numpy supports matrix manipulation. Let's
# look at matrix product. if we want to do elementwise product, we use the "*" sign
A = np.array([[1,1],[0,1]])
B = np.array([[2,0],[3,4]])
print(A*B)

# if we want to do matrix product, we use the "@" sign or use the dot function
print(A@B)

[[2 0]
 [0 4]]
[[5 4]
 [3 4]]


In [10]:
# You don't have to worry about complex matrix operations for this course, but it's important to know that
# numpy is the underpinning of scientific computing libraries in python, and that it is capable of doing both
# element-wise operations (the asterix) as well as matrix-level operations (the @ sign). There's more on this
# in a subsequent course.

In [11]:
# A few more linear algebra concepts are worth layering in here. You might recall that the product of two
# matrices is only plausible when the inner dimensions of the two matrices are the same. The dimensions refer
# to the number of elements both horizontally and vertically in the rendered matricies you've seen here. We
# can use numpy to quickly see the shape of a matrix:
A.shape

(2, 2)

In [12]:
# When manipulating arrays of different types, the type of the resulting array will correspond to 
# the more general of the two types. This is called upcasting.

# Let's create an array of integers
array1 = np.array([[1, 2, 3], [4, 5, 6]])
print(array1.dtype)

# Now let's create an array of floats
array2 = np.array([[7.1, 8.2, 9.1], [10.4, 11.2, 12.3]])
print(array2.dtype)

int32
float64


In [13]:
# Integers (int) are whole numbers only, and Floating point numbers (float) can have a whole number portion
# and a decimal portion. The 64 in this example refers to the number of bits that the operating system is
# reserving to represent the number, which determines the size (or precision) of the numbers that can be
# represented.

# Let's do an addition for the two arrays
array3=array1+array2
print(array3)
print(array3.dtype)

[[ 8.1 10.2 12.1]
 [14.4 16.2 18.3]]
float64


In [14]:
# Notice how the items in the resulting array have been upcast into floating point numbers
# Numpy arrays have many interesting aggregation functions on them, such as  sum(), max(), min(), and mean()
print(array3.sum())
print(array3.max())
print(array3.min())
print(array3.mean())

79.3
18.3
8.1
13.216666666666667


In [15]:
# For two dimensional arrays, we can do the same thing for each row or column
# let's create an array with 15 elements, ranging from 1 to 15, 
# with a dimension of 3X5
b = np.arange(1,16,1).reshape(3,5)
print(b)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]]


In [16]:
# Now, we often think about two dimensional arrays being made up of rows and columns, but you can also think
# of these arrays as just a giant ordered list of numbers, and the *shape* of the array, the number of rows
# and columns, is just an abstraction that we have for a particular purpose. Actually, this is exactly how
# basic images are stored in computer environments.

# Let's take a look at an example and see how numpy comes into play.

In [18]:
# Indexing, Slicing and Iterating
# Indexing, slicing and iterating are extremely important for data manipulation and analysis because these
# techinques allow us to select data based on conditions, and copy or update data.

In [19]:
# First we are going to look at integer indexing. A one-dimensional array, works in similar ways as a list -
# To get an element in a one-dimensional array, we simply use the offset index.
a = np.array([1,3,5,7])
a[2]

5

In [20]:
# For multidimensional array, we need to use integer array indexing, let's create a new multidimensional array
a = np.array([[1,2], [3, 4], [5, 6]])
a

array([[1, 2],
       [3, 4],
       [5, 6]])

In [21]:
# if we want to select one certain element, we can do so by entering the index, which is comprised of two
# integers the first being the row, and the second the column
a[1,1] # remember in python we start at 0!

4

In [22]:
# if we want to get multiple elements 
# for example, 1, 4, and 6 and put them into a one-dimensional array
# we can enter the indices directly into an array function
np.array([a[0, 0], a[1, 1], a[2, 1]])

array([1, 4, 6])

In [23]:
# we can also do that by using another form of array indexing, which essentiall "zips" the first list and the
# second list up
print(a[[0, 1, 2], [0, 1, 1]])

[1 4 6]


In [24]:
# we can also do that by using another form of array indexing, which essentiall "zips" the first list and the
# second list up
print(a[[0, 1, 2], [0, 1, 1]])

[1 4 6]


In [25]:
# Slicing is a way to create a sub-array based on the original array. For one-dimensional arrays, slicing 
# works in similar ways to a list. To slice, we use the : sign. For instance, if we put :3 in the indexing
# brackets, we get elements from index 0 to index 3 (excluding index 3)
a = np.array([0,1,2,3,4,5])
print(a[:3])

[0 1 2]


In [26]:
# By putting 2:4 in the bracket, we get elements from index 2 to index 4 (excluding index 4)
print(a[2:4])

[2 3]


In [27]:
# For multi-dimensional arrays, it works similarly, lets see an example
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
a


array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [28]:
# First, if we put one argument in the array, for example a[:2] then we would get all the elements from the 
# first (0th) and second row (1th)
a[:2]

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [29]:
# If we add another argument to the array, for example a[:2, 1:3], we get the first two rows but then the
# second and third column values only
a[:2, 1:3]

array([[2, 3],
       [6, 7]])

In [30]:
# So, in multidimensional arrays, the first argument is for selecting rows, and the second argument is for 
# selecting columns

In [31]:
# It is important to realize that a slice of an array is a view into the same data. This is called passing by
# reference. So modifying the sub array will consequently modify the original array

# Here I'll change the element at position [0, 0], which is 2, to 50, then we can see that the value in the
# original array is changed to 50 as well

sub_array = a[:2, 1:3]
print("sub array index [0,0] value before change:", sub_array[0,0])
sub_array[0,0] = 50
print("sub array index [0,0] value after change:", sub_array[0,0])
print("original array index [0,1] value after change:", a[0,1])

sub array index [0,0] value before change: 2
sub array index [0,0] value after change: 50
original array index [0,1] value after change: 50
