# Numpy

https://numpy.org/

## What is it? 
- Numpy is a Python library used for working with arrays
- Numpy is the fundamental package for scientific computing in Python


## Why do we care? 
- Numpy is one of the main reasons why Python is so powerful and popular for scientific computing
- Super fast. Numpy arrays are implemented in C, which makes numpy very fast.
- The arrays allows for vectorized operations

## Show us! 

### Create a 1D array

#### create a list
format: list()

In [1]:
#create a list
my_list = [1,2,3,4]

In [2]:
#whats the type
type(my_list)

list

In [459]:
#what is dtype
# my_list.dtype
# list doesn't have dtype function

In [460]:
#whats the shape
# my_list.shape
# list doesn't have shape function

#### create an array
format: np.array()

In [3]:
#import numpy
import numpy as np

In [4]:
#create the array
my_array = np.array(my_list)

In [5]:
#whats the type
type(my_array)

numpy.ndarray

In [6]:
#whats the shape
my_array.shape

(4,)

In [7]:
my_array.dtype

dtype('int64')

#### access elements of our new array

In [8]:
my_array

array([1, 2, 3, 4])

In [9]:
my_array[0]

1

In [10]:
my_array[-1]

4

#### slice the array

In [11]:
my_array[2:] #inclusive for first element

array([3, 4])

In [13]:
my_array[:2] #exclusive for last element

array([1, 2])

In [15]:
my_array[1:3]

array([2, 3])

#### create an array from 1 to 100

In [17]:
#np.array(?)
a = np.array(range(1,101))

In [18]:
#access single element
a[45]

46

In [20]:
a[-3]

98

In [21]:
#slice the array! 
a[45:55]

array([46, 47, 48, 49, 50, 51, 52, 53, 54, 55])

### Create a 2D array

In [27]:
#build a list of lists
matrix = [[1,2,3],
[4,5,6],
[7,8,9],
[10,11,12]]
matrix

[[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]

In [23]:
type(matrix)

list

In [28]:
#turn it into an array! 
matrix_array = np.array(matrix)
matrix_array

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [26]:
#type
type(matrix_array)

numpy.ndarray

In [29]:
matrix_array.shape

(4, 3)

#### access elements

In [30]:
matrix_array[0]

array([1, 2, 3])

In [31]:
matrix_array[0][1]

2

In [36]:
matrix_array[1:]

array([[ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

### Descriptive Stats

In [37]:
a.min()

1

In [40]:
a.max(), a.mean(), a.sum()

(100, 50.5, 5050)

#### using methods: the method is called on the numpy object

In [42]:
np.min(a)

1

In [43]:
np.max(a)

100

In [44]:
np.mean(a)

50.5

#### using functions: using numpy to call functions

In [213]:
np.max(a)

100

In [214]:
np.std(a)

28.86607004772212

In [215]:
np.mean(a)

50.5

In [216]:
np.sum(a)

5050

### Array of Booleans! 

### Boolean Masks

1. create an array
2. make a list of booleans (mask)
3. combine

    format: array [ list of booleans ] 

In [45]:
#pull back our small array
my_array

array([1, 2, 3, 4])

In [51]:
#make list of booleans aka our mask
mask = [True, False, False, True]

In [53]:
#combine them 
my_array[mask] #returns items where the mask is true
# mask needs to be same size as array elements

array([1, 4])

#### how else can we get our array of boolean values?

In [54]:
my_array

array([1, 2, 3, 4])

In [55]:
#write a conditional with the array
my_array == 4

array([False, False, False,  True])

In [75]:
mask = my_array < 4
mask

array([ True,  True,  True, False])

In [60]:
my_array[mask]

array([1, 2, 3])

In [79]:
mask_a = (a % 2 == 1) & (a > 45)
mask_a

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False])

In [62]:
a[mask_a]

array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33,
       35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
       69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99])

In [None]:
# if we want to make multiple conditions with an array
# we MUST USE & and | INSTEAD OF and/or

#### make and apply our boolean mask

In [231]:
matrix_array

array([False, False, False,  True, False])

In [68]:
#format: array_name [ list_of_booleans ]
mask = matrix_array > 3
mask

array([[False, False, False],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [69]:
matrix_array[mask]
# using a mask on a matrix will return a flatened list of elements

array([ 4,  5,  6,  7,  8,  9, 10, 11, 12])

#### cool! can we just do it with a list instead of an array?

In [71]:
#pull back our initial list
my_list

[1, 2, 3, 4]

In [73]:
#create a array of booleans
my_list == 2
# cannot compare elements in a list in order to create a list of bools
# like we can with an array

False

In [76]:
#apply an array of booleans to a list
mask

array([ True,  True,  True, False])

In [77]:
# my_list[mask]
#cannot use a mask on lists

TypeError: only integer scalar arrays can be converted to a scalar index

#### let's bring it back to arrays and make them bigger

In [81]:
#array
a

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100])

In [82]:
#mask
mask_a

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False])

In [83]:
#boolean mask
a[mask_a]

array([47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
       81, 83, 85, 87, 89, 91, 93, 95, 97, 99])

#### shortcut?

In [85]:
a[(a % 2 == 1) & (a > 45)]
# we can use the conditionals directly with the array
# instead of using a mask
# if we want to

array([47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
       81, 83, 85, 87, 89, 91, 93, 95, 97, 99])

#### what if we wanted to check two conditions?

In [87]:
# note the parenthesis
# also note that we're using & instead of and
mask_a = (a % 2 == 1) & (a > 45) | (a < 5)

In [88]:
mask_a

array([ True,  True,  True,  True, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False])

#### what if we wanted the opposite of our mask?

In [89]:
mask_a

array([ True,  True,  True,  True, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False])

In [91]:
# the ~ notates the opposite of
~mask_a

array([False, False, False, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True])

In [92]:
a[~mask_a]

array([  5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,
        18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,
        31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,
        44,  45,  46,  48,  50,  52,  54,  56,  58,  60,  62,  64,  66,
        68,  70,  72,  74,  76,  78,  80,  82,  84,  86,  88,  90,  92,
        94,  96,  98, 100])

## Vectorization Operations
- makes looping over math so easy and fast!

### Add one to every element

#### hard way: doing it in a list

In [93]:
my_list

[1, 2, 3, 4]

In [96]:
#using a for loop
new_list = []
for x in my_list:
    new_list.append(x + 1)
new_list

[2, 3, 4, 5]

In [97]:
#using a list comprehension
new_list = [x + 1 for x in my_list]
new_list

[2, 3, 4, 5]

#### easy way: doing it with an array

In [98]:
my_array

array([1, 2, 3, 4])

In [99]:
#using vectorization operations
my_array + 1

array([2, 3, 4, 5])

### more operations

In [100]:
my_array * 2

array([2, 4, 6, 8])

In [101]:
my_array ** 2

array([ 1,  4,  9, 16])

In [102]:
my_array.sum()

10

In [103]:
my_array / 10

array([0.1, 0.2, 0.3, 0.4])

In [104]:
np.log(my_array)

array([0.        , 0.69314718, 1.09861229, 1.38629436])

### show us the speed

In [107]:
#make a really big array
big_array = np.array(range(1_000_001))
big_array
#we can use '_' instead of a comma in large numbers

array([      0,       1,       2, ...,  999998,  999999, 1000000])

In [114]:
%%timeit
# %%timeit MUST BE PLACED AT TOP OF CELL
# will run the function 1000 times and take measurements of the time to run
big_array ** 2

304 µs ± 388 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [115]:
#make a really big list
big_list = list(big_array)
big_list[:10]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [116]:
%%timeit
for x in big_list:
    x ** 2

124 ms ± 756 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


time for same function is measured in micro seconds when in array form
milliseconds when in list form

micro is smaller time than milli

## Numpy ways to create arrays

#### full of zeros

In [119]:
np.zeros((10,2))

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

#### full of ones

In [126]:
np.ones((10,2))

array([[1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.]])

#### full of whatever you want 

In [127]:
np.full(10, 3)

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [128]:
np.full((10,2), 'potatoes')

array([['potatoes', 'potatoes'],
       ['potatoes', 'potatoes'],
       ['potatoes', 'potatoes'],
       ['potatoes', 'potatoes'],
       ['potatoes', 'potatoes'],
       ['potatoes', 'potatoes'],
       ['potatoes', 'potatoes'],
       ['potatoes', 'potatoes'],
       ['potatoes', 'potatoes'],
       ['potatoes', 'potatoes']], dtype='<U8')

#### a quicker way to make a range

In [129]:
np.array(range(10))

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [130]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [131]:
np.arange(1,11)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

#### an array of random integers

In [149]:
#np.random.randint()
np.random.randint(2,20, (10,2))

array([[11, 17],
       [10, 17],
       [12, 15],
       [17,  9],
       [19,  2],
       [ 9,  4],
       [ 6, 15],
       [ 8,  9],
       [18,  4],
       [ 4, 10]])

#### an array of random numbers from the standard distribution

In [151]:
np.random.randn(100)
# numbers chosen randomly from standard distribution curve
# 

array([ 1.02805998,  0.44411574, -2.01506853, -1.248447  ,  0.240254  ,
        0.30909566,  0.94522352, -0.85389088, -0.36900442, -0.4490035 ,
       -0.38257231, -0.23248478,  0.77923316, -0.28033456,  0.39541151,
       -0.20929846,  1.21672297,  0.25391834, -1.31175197, -1.67255218,
       -0.29158426,  0.46060021, -0.99140659,  1.62906217, -0.95049762,
        0.64791528,  1.69050986, -0.43368545,  0.74428573,  0.46658786,
        1.08040674,  0.39354229, -0.71947687, -0.53820099,  0.54771368,
       -0.55231813, -1.12478478,  0.15222159, -1.1221237 ,  2.23337928,
       -1.00007001,  0.18893229, -0.48631568,  0.08838188, -0.21984402,
       -0.31350461,  0.8852991 ,  0.87415048, -1.14003618,  0.46323907,
       -0.02582967,  1.30148765, -0.85935591,  0.24374143,  0.26397985,
        1.42059601,  0.02176853,  0.15735547,  1.24166203,  0.24054499,
        0.15865615,  0.42650407,  1.7892787 , -2.09954327,  0.94165632,
       -1.04912245, -0.96440831, -1.07294447, -0.92421473, -0.84