# Numpy
Numerical Python

- NumPy (or Numpy) is a Linear Algebra Library for Python, the reason it is so important for Data Science with Python is that almost all of the libraries in the yData Ecosystem rely on NumPy as one of their main building blocks.
- Numpy is also incredibly fast, as it has bindings to C libraries.
- Numpy arrays are the main way we will use Numpy throughout the course.
- Numpy arrays essentially come in two flavors: vectors and matrices.
- Vectors are strictly 1-d arrays and matrices are 2-d (but you should note a matrix can still have only one row or one column)).

In [64]:
import numpy as np
from numpy.random import randint

---

## NumPy Arrays

In [4]:
my_list = [1,2,3]

In [5]:
arr = np.array(my_list)

In [6]:
arr

array([1, 2, 3])

In [7]:
my_mat = [[1,2,3],[4,5,6],[7,8,9]]

In [8]:
np.array(my_mat)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [9]:
np.arange(0, 10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [46]:
np.arange(0, 11, 2) # [start, end), step

array([ 0,  2,  4,  6,  8, 10])

In [47]:
np.arange(5)

array([0, 1, 2, 3, 4])

In [14]:
np.zeros(3)

array([0., 0., 0.])

In [25]:
np.zeros((2,3)) # Takes a tuple

array([[0., 0., 0.],
       [0., 0., 0.]])

In [17]:
np.ones(4)

array([1., 1., 1., 1.])

In [18]:
np.ones((3,4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [20]:
np.linspace(0,5,10) # 10 evenly spaced points of 0 to 5

array([0.        , 0.55555556, 1.11111111, 1.66666667, 2.22222222,
       2.77777778, 3.33333333, 3.88888889, 4.44444444, 5.        ])

- the third argument in linspace represents the number of points and in arange it repents the number of steps.

In [22]:
# Identity Matrix - no. rows are equal to no. of cols with diagonal of 1s only. Identity matrix must be a square matrix.
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [23]:
np.random.rand(5) # This will give an array of the shape of choice and fill it up with uniformly distributed elements of 0 to 1

array([0.57694615, 0.94362768, 0.76569932, 0.36030105, 0.25357479])

In [33]:
np.random.rand()

0.8848453543160889

In [29]:
np.random.rand(2,3) # No need for a tuple

array([[0.03363058, 0.84030778, 0.64473709],
       [0.25151038, 0.92935271, 0.44856704]])

In [32]:
# Samples from normal distribution or standard Gaussian distribution
np.random.randn() # Numbers not from a uniform distribution of 0 to 1 but instead from a standard normal distribution centered around zero.

2.4624957402500196

In [34]:
np.random.randn(2)

array([-0.40975676,  0.60527888])

In [35]:
np.random.randn(2,3)

array([[ 1.03481684,  1.77640625,  1.03828335],
       [ 0.09751545,  0.3837863 , -1.69330769]])

In [41]:
np.random.randint(2, 5) # Returns random integers from low to high

3

In [43]:
np.random.randint(1, 100, 50) # 50 random integers from [1,100)

array([50, 71, 73, 86, 40, 87, 59, 38, 31, 61, 56, 14, 25, 67, 82, 84, 89,
       92,  5, 15, 55, 77, 82, 32, 19, 82, 81, 62, 65, 99,  9, 81, 16, 33,
        9, 23, 86, 75, 93, 19, 77, 50, 59, 86,  7,  9, 49, 11, 67,  2])

### Operations on Arrays

In [44]:
arr = np.arange(25)

In [45]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [49]:
ranarr = np.random.randint(0, 50, 10)

In [50]:
ranarr

array([23, 37, 35, 28,  7, 39, 47, 17, 31, 47])

In [54]:
# Reshape an array
arr.reshape(5, 5) # rows, cols
# Error will be thrown if we cannot fill the matrix completely or if the shape cannot contain the whole array.

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [56]:
ranarr.max()

47

In [57]:
ranarr.min()

7

In [58]:
ranarr.argmax() # index value of the maximum value

6

In [59]:
ranarr.argmin()

4

In [61]:
# Shape of a vector
arr.shape

(25,)

In [63]:
# Data type in the array
arr.dtype

dtype('int32')

- Numpy arrays generally store Numbers but can store other data types too.

---

## NumPy Array Indexing and Selection

In [65]:
arr = np.arange(0, 11)

In [66]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [67]:
arr[8]

8

In [68]:
arr[1:5]

array([1, 2, 3, 4])

In [69]:
arr[:5]

array([0, 1, 2, 3, 4])

In [71]:
arr[5:]

array([ 5,  6,  7,  8,  9, 10])

In [72]:
arr[-1]

10

In [73]:
arr[5:3:-1]

array([5, 4])

In [75]:
arr[5:8:2]

array([5, 7])

In [76]:
arr[0:5] = 100 # Broadcast 100 in to 1st 5 elements

In [77]:
arr

array([100, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])

In [79]:
arr = np.arange(0,11)

In [86]:
slice_of_arr = arr[0:6] # slice_of_arr is just referencing the 1st 5 elements inside arr and not creating a new arr.
# NumPy does not automatically set copies of arrays to save space.

In [85]:
slice_of_arr

array([99, 99, 99, 99, 99, 99])

In [83]:
# Broadcasting
slice_of_arr[:] = 99

In [87]:
arr

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [88]:
arr_copy = arr.copy() # Shallow Copy the numpy array

In [90]:
arr_copy[:] = 100

In [91]:
arr_copy

array([100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100])

In [92]:
arr

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

### Indexing and Selection of 2-D Arrays

In [95]:
arr_2d = np.array([[5,10,15],[20,25,30],[35,40,45]])

In [96]:
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [98]:
arr_2d[0][0]

5

In [99]:
arr_2d[0]

array([ 5, 10, 15])

In [100]:
arr_2d[0,2]

15

In [105]:
arr_2d[:2,1:] # Grabing sub sections

array([[10, 15],
       [25, 30]])

In [102]:
arr_2d[:2]

array([[ 5, 10, 15],
       [20, 25, 30]])

In [107]:
arr = np.arange(1,11)

In [108]:
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [110]:
bool_arr = arr > 5

In [111]:
bool_arr

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

In [114]:
# Conditional Selection
arr[bool_arr] # Only those values are returned whose index have True in bool_arr

array([ 6,  7,  8,  9, 10])

In [115]:
arr[arr > 5] # Returns elements from arr that are > 5

array([ 6,  7,  8,  9, 10])

In [118]:
arr[arr < 3] # Elements < 3 are returned

array([1, 2])

In [119]:
arr_2d = np.arange(50).reshape(5, 10)

In [120]:
arr_2d

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]])

In [122]:
arr_2d[1:3, 3:5] # 1st 2 rows and 3, 4, 5 cols

array([[13, 14],
       [23, 24]])

In [123]:
arr_2d[1:3]

array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]])

---

# NumPy Operations
- Array with Array
- Array with Scalars
- Universal Array Functions

In [124]:
arr = np.arange(0,11)

In [125]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

### Array with Array

In [127]:
arr + arr

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [128]:
arr - arr

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [129]:
arr * arr

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100])

In [138]:
# Sometimes NumPy issues a warning instead of errors in case of Mathematical operations
# 1/0 # <-- Python will give error
arr / arr # <-- Will not give error, rather will give warning
# The first element gives nan (not a number)



array([nan,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

### Array with Scalars

In [132]:
arr + 100

array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110])

In [134]:
arr - 100

array([-100,  -99,  -98,  -97,  -96,  -95,  -94,  -93,  -92,  -91,  -90])

In [135]:
arr / 100

array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ])

In [136]:
arr * 100

array([   0,  100,  200,  300,  400,  500,  600,  700,  800,  900, 1000])

In [140]:
1 / arr
# The 1st element returns inf (infinity) with a warning

  1 / arr


array([       inf, 1.        , 0.5       , 0.33333333, 0.25      ,
       0.2       , 0.16666667, 0.14285714, 0.125     , 0.11111111,
       0.1       ])

In [141]:
arr ** 2

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100])

### Universal Array Functions

In [142]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ,
       3.16227766])

In [143]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03, 2.20264658e+04])

In [145]:
arr.max()

10

In [146]:
np.max(arr)

10

In [148]:
np.sin(arr)

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,
       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849,
       -0.54402111])

In [150]:
np.log(arr)
# We get -inf (negative infinity) with a warning

  np.log(arr)


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436,
       1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458,
       2.30258509])

---------------