
<a href='https://huntsman.usu.edu/directory/jahangiry-pedram'> <img src="logo.jpg" /></a>

___
## Pedram Jahangiry 

# NumPy 

Topics to be covered:

1. Numpy arrays
2. Numpy indexing and extraction
3. Numpy operations



In [1]:
import numpy as np

##  1. NumPy Arrays

In [2]:
my_list = [0,1,2,3]
my_list

[0, 1, 2, 3]

In [3]:
np.array(my_list)

array([0, 1, 2, 3])

In [4]:
my_matrix = [[1,2,3],
            [4,5,6]]
my_matrix

[[1, 2, 3], [4, 5, 6]]

In [5]:
np.array(my_matrix)

array([[1, 2, 3],
       [4, 5, 6]])

In [6]:
# Built in methods
# np.arange() :Return evenly spaced values within a given interval.
np.arange(0,4)

array([0, 1, 2, 3])

In [7]:
np.arange(0,10,3)

array([0, 3, 6, 9])

In [8]:
# Zeros and ones : Generate arrays of zeros or ones
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [9]:
np.zeros((5,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [10]:
np.ones(5)

array([1., 1., 1., 1., 1.])

In [11]:
np.ones((5,5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [12]:
# linspace : Return evenly spaced numbers over a specified interval. 
np.linspace(0,20,5)

array([ 0.,  5., 10., 15., 20.])

In [13]:
# eye: Creates identity matrix
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [16]:
# rand : create random samples from a uniform distribution
np.random.rand(5)

array([0.37091506, 0.34468121, 0.51730883, 0.52797413, 0.55935812])

In [17]:
np.random.rand(5,5)

array([[0.48987588, 0.00123656, 0.7317599 , 0.0095794 , 0.65304179],
       [0.03914186, 0.94614821, 0.65136502, 0.14343999, 0.37541345],
       [0.9917023 , 0.73828158, 0.04402431, 0.59735824, 0.82636435],
       [0.76261691, 0.57260339, 0.83817015, 0.10297174, 0.40985788],
       [0.62401892, 0.09991981, 0.73202587, 0.89359497, 0.05071513]])

In [18]:
# randn : create random samples from standard normal distribution
np.random.randn(5)

array([ 0.1434359 , -0.06417417, -1.16778777,  0.86874511, -0.20570673])

In [19]:
np.random.randn(5,5)

array([[-1.23217695,  0.03475537, -0.9640451 , -1.44138291, -0.87211242],
       [-0.89019261, -1.3739376 , -0.08447255,  0.6237676 , -1.12574173],
       [-0.84872897,  0.59341166,  0.28473782, -0.13241602, -0.32248765],
       [-1.10167808, -1.35528479, -0.00285159,  1.64445104,  0.60221587],
       [ 0.55884719,  0.83482164,  0.04852474,  0.85456972,  0.66346221]])

In [20]:
# randit(a,b) : create random sample of integers from a (including a) to b (excluding b) 
np.random.randint(1,5)

3

In [21]:
np.random.randint(1,5,20)

array([1, 1, 3, 3, 1, 1, 2, 1, 2, 4, 1, 2, 2, 4, 2, 4, 2, 1, 3, 3])

In [25]:
# seed is used to fix the random state.
np.random.seed(100)
np.random.randn(5)

array([-1.74976547,  0.3426804 ,  1.1530358 , -0.25243604,  0.98132079])

In [26]:
np.random.seed(100)
np.random.randn(4)

array([-1.74976547,  0.3426804 ,  1.1530358 , -0.25243604])

In [35]:
# array methods

my_array = np.arange(1,10)
my_array


array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [36]:
# reshape
new_array = my_array.reshape(3,3)
new_array

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [37]:
new_array.shape

(3, 3)

In [38]:
new_array.dtype

dtype('int32')

In [39]:
type(new_array)

numpy.ndarray

In [40]:
my_array = np.append(my_array, [100,-100])
my_array

array([   1,    2,    3,    4,    5,    6,    7,    8,    9,  100, -100])

In [41]:
my_array.max()

100

In [42]:
my_array.argmax()

9

In [43]:
my_array.min()

-100

In [44]:
my_array.argmin()

10

## 2. Numpy indexing and extraction

In [45]:
my_array

array([   1,    2,    3,    4,    5,    6,    7,    8,    9,  100, -100])

In [46]:
# extraction is very similar to list extraction
my_array[9]

100

In [47]:
my_array[6:9]

array([7, 8, 9])

In [48]:
# With NumPy arrays, you can broadcast a single value across a larger set of values. This is not possible using lists. 

my_array[0:5]=100
my_array

array([ 100,  100,  100,  100,  100,    6,    7,    8,    9,  100, -100])

In [49]:
my_list = list(range(1,10))
my_list

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [50]:
my_list[0:5]

[1, 2, 3, 4, 5]

In [51]:
my_list[0:2]=100

TypeError: can only assign an iterable

In [52]:
my_list[0:2]=[100,100,100]
my_list

[100, 100, 100, 3, 4, 5, 6, 7, 8, 9]

In [53]:
# matrix: Note that matrix indexing in python is slightly different than R, Matlab or other programmings. 

my_matrix= np.arange(0,6).reshape(2,3)
my_matrix

array([[0, 1, 2],
       [3, 4, 5]])

In [54]:
# Format is matrix[row][col] or matrix[row,col]

# extracting the first row
my_matrix[0]

array([0, 1, 2])

In [55]:
# extracting the first column
my_matrix[:,0]

array([0, 3])

In [56]:
my_matrix[0][1] == my_matrix[0,1]

True

In [57]:
my_matrix[:2,1:]

array([[1, 2],
       [4, 5]])

Use google image to get help on "numpy array indexing" 


### Extracting with conditional selection


In [58]:
my_array

array([ 100,  100,  100,  100,  100,    6,    7,    8,    9,  100, -100])

In [59]:
my_array > 50

array([ True,  True,  True,  True,  True, False, False, False, False,
        True, False])

In [60]:
my_array[my_array>50]

array([100, 100, 100, 100, 100, 100])

This is another advantage of using arrays instead of lists. You cannot simply appy the comparison operators to a list. You must use filter() function instead. 

In [64]:
list(filter(lambda x: x>50, my_array))

[100, 100, 100, 100, 100, 100]

In [65]:
my_matrix

array([[0, 1, 2],
       [3, 4, 5]])

In [66]:
my_matrix > 1

array([[False, False,  True],
       [ True,  True,  True]])

In [68]:
my_matrix[my_matrix > 1]
# the shape is not reserved though!

array([2, 3, 4, 5])

## 3. Numpy operations

In [69]:
arr = np.arange(0,5)
arr

array([0, 1, 2, 3, 4])

In [70]:
arr + arr

array([0, 2, 4, 6, 8])

In [71]:
arr ** arr

array([  1,   1,   4,  27, 256], dtype=int32)

In [72]:
arr/arr

  """Entry point for launching an IPython kernel.


array([nan,  1.,  1.,  1.,  1.])

In [73]:
1/arr

  """Entry point for launching an IPython kernel.


array([       inf, 1.        , 0.5       , 0.33333333, 0.25      ])

In [74]:
# Square Roots
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ])

In [75]:
# Exponential
np.exp(arr)

array([ 1.        ,  2.71828183,  7.3890561 , 20.08553692, 54.59815003])

In [77]:
# Natural Logarithm
np.log(arr)

  


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436])

In [78]:
# summary statistics on arrays
arr

array([0, 1, 2, 3, 4])

In [79]:
arr.sum()

10

In [80]:
arr.mean()

2.0

In [81]:
arr.var()

2.0

In [82]:
arr.std()

1.4142135623730951

## Axis Logic
When working with 2-dimensional arrays (matrices) we have to consider rows and columns. This becomes very important when we get to the section on pandas. In array terms, axis 0 is the vertical axis (elements on the rows), and axis 1 is the horizonal axis (elements on the columns). These values (0,1) correspond to the order in which <tt>arr.shape</tt> values are returned.

Let's see how this affects our summary statistic calculations from above.

In [83]:
my_matrix

array([[0, 1, 2],
       [3, 4, 5]])

In [85]:
# axis 0 (zero) is doing a vertical operation, and axis 1 a horizonal one. 
# again, note that the logic is different from R, Matlab or etc
my_matrix.sum(axis=0)

array([3, 5, 7])

In [86]:
my_matrix.sum(1)

array([ 3, 12])

In [87]:
my_matrix * my_matrix  # note that this is not a matrix multiplication

array([[ 0,  1,  4],
       [ 9, 16, 25]])

In [88]:
# To do a matrix multiplication use np.dot (equivalent to %*% in R)
np.dot(my_matrix,my_matrix.T)

array([[ 5, 14],
       [14, 50]])

In [90]:
# inverse of a matrix
A = np.array([[1,2],[3,4]]) 
A

array([[1, 2],
       [3, 4]])

In [91]:
A_inv = np.linalg.inv(A)
A_inv

array([[-2. ,  1. ],
       [ 1.5, -0.5]])