
<a href='https://huntsman.usu.edu/directory/jahangiry-pedram'> <img src="logo.jpg" /></a>

___
## Pedram Jahangiry 

# NumPy 

Topics to be covered:

1. Numpy arrays
2. Numpy indexing and extraction
3. Numpy operations



In [1]:
import numpy as np

##  1. NumPy Arrays

In [2]:
my_list = [0,1,2,3]
my_list

[0, 1, 2, 3]

In [3]:
np.array(my_list)

array([0, 1, 2, 3])

In [4]:
my_matrix = [[1,2,3],
            [4,5,6]]
my_matrix

[[1, 2, 3], [4, 5, 6]]

In [5]:
np.array(my_matrix)

array([[1, 2, 3],
       [4, 5, 6]])

In [6]:
# Built in methods
# np.arange() :Return evenly spaced values within a given interval.
np.arange(0,4)

array([0, 1, 2, 3])

In [7]:
np.arange(0,10,3)

array([0, 3, 6, 9])

In [8]:
# Zeros and ones : Generate arrays of zeros or ones
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [9]:
np.zeros((5,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [10]:
np.ones(5)

array([1., 1., 1., 1., 1.])

In [11]:
np.ones((5,5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [12]:
# linspace : Return evenly spaced numbers over a specified interval. 
np.linspace(0,20,5)

array([ 0.,  5., 10., 15., 20.])

In [13]:
# eye: Creates identity matrix
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [14]:
# rand : create random samples from a uniform distribution
np.random.rand(5)

array([0.24743719, 0.23361749, 0.99695809, 0.63900589, 0.9334282 ])

In [15]:
np.random.rand(5,5)

array([[0.75151958, 0.80063038, 0.14451056, 0.33734486, 0.27168524],
       [0.91440163, 0.07305328, 0.68766001, 0.46134214, 0.39925077],
       [0.12292387, 0.39694622, 0.81511339, 0.74829813, 0.00304258],
       [0.49405795, 0.37169662, 0.60717281, 0.76731192, 0.06313865],
       [0.18904521, 0.64445119, 0.37663504, 0.79442819, 0.6753975 ]])

In [16]:
# randn : create random samples from standard normal distribution
np.random.randn(5)

array([ 0.39390151, -1.00159347, -0.42107951,  1.16437189, -0.41880771])

In [17]:
np.random.randn(5,5)

array([[ 0.11406524, -0.25653226, -0.36776475, -0.74455242, -0.08243789],
       [ 0.10119703, -0.23826288, -1.39111353,  0.47035082,  0.56374782],
       [ 1.41655916,  0.66789595,  1.25703205, -0.59814531, -1.23362211],
       [ 0.17848835,  0.65575975, -0.59251051,  0.53511962,  1.30532523],
       [-1.97123125,  0.11010475, -1.10363541,  1.32782061,  0.16670226]])

In [25]:
# randit(a,b) : create random sample of integers from a (including a) to b (excluding b) 
np.random.randint(1,5)

4

In [26]:
np.random.randint(1,5,20)

array([1, 3, 3, 4, 4, 2, 2, 3, 2, 1, 2, 3, 1, 4, 1, 1, 1, 4, 1, 3])

In [27]:
# seed is used to fix the random state.
np.random.seed(100)
np.random.randn(5)

array([-1.74976547,  0.3426804 ,  1.1530358 , -0.25243604,  0.98132079])

In [28]:
np.random.seed(100)
np.random.randn(4)

array([-1.74976547,  0.3426804 ,  1.1530358 , -0.25243604])

In [29]:
# array methods

my_array = np.arange(1,10)
my_array


array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [30]:
# reshape
new_array = my_array.reshape(3,3)
new_array

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [31]:
new_array.shape

(3, 3)

In [33]:
new_array.dtype

dtype('int32')

In [34]:
type(new_array)

numpy.ndarray

In [35]:
my_array = np.append(my_array, [100,-100])
my_array

array([   1,    2,    3,    4,    5,    6,    7,    8,    9,  100, -100])

In [36]:
my_array.max()

100

In [37]:
my_array.argmax()

9

In [38]:
my_array.min()

-100

In [39]:
my_array.argmin()

10

## 2. Numpy indexing and extraction

In [40]:
my_array

array([   1,    2,    3,    4,    5,    6,    7,    8,    9,  100, -100])

In [41]:
# extraction is very similar to list extraction
my_array[1]

2

In [42]:
my_array[6:9]

array([7, 8, 9])

In [43]:
# With NumPy arrays, you can broadcast a single value across a larger set of values. This is not possible using lists. 

my_array[0:5]=100
my_array

array([ 100,  100,  100,  100,  100,    6,    7,    8,    9,  100, -100])

In [44]:
my_list = list(range(1,10))
my_list

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [45]:
my_list[0:5]

[1, 2, 3, 4, 5]

In [46]:
my_list[0:2]=100

TypeError: can only assign an iterable

In [47]:
my_list[0:2]=[100,100,100]
my_list

[100, 100, 100, 3, 4, 5, 6, 7, 8, 9]

In [49]:
# matrix: Note that matrix indexing in python is slightly different than R, Matlab or other programming languages. 

my_matrix= np.arange(0,6).reshape(2,3)
my_matrix

array([[0, 1, 2],
       [3, 4, 5]])

In [50]:
# Format is matrix[row][col] or matrix[row,col]

# extracting the first row
my_matrix[0]

array([0, 1, 2])

In [51]:
# extracting the first column
my_matrix[:,0]

array([0, 3])

In [52]:
my_matrix[0][1] == my_matrix[0,1]

True

In [53]:
my_matrix[:2,1:]

array([[1, 2],
       [4, 5]])

Use google image and search for "numpy array indexing" 


### Extracting with conditional selection


In [54]:
my_array

array([ 100,  100,  100,  100,  100,    6,    7,    8,    9,  100, -100])

In [55]:
my_array > 50

array([ True,  True,  True,  True,  True, False, False, False, False,
        True, False])

In [56]:
my_array[my_array>50]

array([100, 100, 100, 100, 100, 100])

This is another advantage of using arrays instead of lists. You cannot simply apply the comparison operators to a list. You must use filter() function instead. 

In [57]:
list(filter(lambda x: x>50, my_array))

[100, 100, 100, 100, 100, 100]

In [58]:
my_matrix

array([[0, 1, 2],
       [3, 4, 5]])

In [59]:
my_matrix > 1

array([[False, False,  True],
       [ True,  True,  True]])

In [60]:
my_matrix[my_matrix > 1]
# the shape is not reserved though!

array([2, 3, 4, 5])

## 3. Numpy operations

In [61]:
arr = np.arange(0,5)
arr

array([0, 1, 2, 3, 4])

In [62]:
arr + arr

array([0, 2, 4, 6, 8])

In [63]:
arr ** arr

array([  1,   1,   4,  27, 256], dtype=int32)

In [64]:
arr/arr

  arr/arr


array([nan,  1.,  1.,  1.,  1.])

In [65]:
1/arr

  1/arr


array([       inf, 1.        , 0.5       , 0.33333333, 0.25      ])

In [66]:
# Square Roots
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ])

In [67]:
# Exponential
np.exp(arr)

array([ 1.        ,  2.71828183,  7.3890561 , 20.08553692, 54.59815003])

In [68]:
# Natural Logarithm
np.log(arr)

  np.log(arr)


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436])

In [69]:
# summary statistics on arrays
arr

array([0, 1, 2, 3, 4])

In [70]:
arr.sum()

10

In [71]:
arr.mean()

2.0

In [72]:
arr.var()

2.0

In [73]:
arr.std()

1.4142135623730951

## Axis Logic
When working with 2-dimensional arrays (matrices) we have to consider rows and columns. This becomes very important when we get to the section on pandas. In array terms, axis 0 is the vertical axis (elements on the rows), and axis 1 is the horizonal axis (elements on the columns). These values (0,1) correspond to the order in which <tt>arr.shape</tt> values are returned.

Let's see how this affects our summary statistic calculations from above.

In [74]:
my_matrix

array([[0, 1, 2],
       [3, 4, 5]])

In [75]:
# axis 0 (zero) is doing a vertical operation, and axis 1 a horizonal one. 
# again, note that the logic is different from R, Matlab or etc
my_matrix.sum(axis=0)

array([3, 5, 7])

In [76]:
my_matrix.sum(1)

array([ 3, 12])

In [77]:
my_matrix * my_matrix  # note that this is not a matrix multiplication

array([[ 0,  1,  4],
       [ 9, 16, 25]])

In [78]:
# To do a matrix multiplication use np.dot (equivalent to %*% in R)
np.dot(my_matrix,my_matrix.T)

array([[ 5, 14],
       [14, 50]])

In [79]:
# inverse of a matrix
A = np.array([[1,2],[3,4]]) 
A

array([[1, 2],
       [3, 4]])

In [80]:
A_inv = np.linalg.inv(A)
A_inv

array([[-2. ,  1. ],
       [ 1.5, -0.5]])