# Numpy

https://numpy.org/

## What is it? 
- Numpy is a Python library used for working with arrays
- Numpy is the fundamental package for scientific computing in Python


## Why do we care? 
- Numpy is one of the main reasons why Python is so powerful and popular for scientific computing
- Super fast. Numpy arrays are implemented in C, which makes numpy very fast.
- The arrays allows for vectorized operations

## Show us! 

### Create a 1D array

#### create a list
format: list()

In [510]:
#create a list
my_list = [1,2,3,4]
my_list

[1, 2, 3, 4]

In [511]:
#whats the type
type(my_list)

list

In [513]:
#what is dtype
# my_list.dtype

In [515]:
#whats the shape
# list.shape

#### create an array
format: np.array()

In [516]:
#import numpy
import numpy as np

In [518]:
#create the array
my_array = np.array(my_list)
my_array

array([1, 2, 3, 4])

In [519]:
#whats the type
type(my_array)

numpy.ndarray

In [521]:
#whats the shape
my_array.shape

(4,)

In [523]:
#dtype
my_array.dtype

dtype('int64')

#### access elements of our new array

In [524]:
my_array

array([1, 2, 3, 4])

In [528]:
my_array[0] #calling the first index, which is zero

1

In [529]:
my_array[-1] #calls the last element of our array

4

#### slice the array

In [536]:
my_array[1]

2

In [531]:
my_array[1:] #inclusive for the first element

array([2, 3, 4])

In [537]:
my_array[2]

3

In [533]:
my_array[:2] #exclusive for the last element 

array([1, 2])

In [538]:
my_array[0:3]

array([1, 2, 3])

#### create an array from 1 to 100

In [543]:
#np.array(?)
a = np.array(range(1,101))
a

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100])

In [544]:
#access single element
a[0] 

1

In [545]:
a[-1]

100

In [546]:
#slice the array! 
a[25:]

array([ 26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100])

In [547]:
a[5:15]

array([ 6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

### Create a 2D array

In [565]:
#build a list of lists
matrix = [[1,2,3],
[2,3,4],
[6,7,8],
[4,5,6]]
matrix

[[1, 2, 3], [2, 3, 4], [6, 7, 8], [4, 5, 6]]

In [566]:
type(matrix)

list

In [567]:
#turn it into an array! 
matrix_array = np.array(matrix)
matrix_array

array([[1, 2, 3],
       [2, 3, 4],
       [6, 7, 8],
       [4, 5, 6]])

In [568]:
#type
type(matrix_array)

numpy.ndarray

In [569]:
matrix_array.shape

(4, 3)

In [570]:
matrix_array.dtype

dtype('int64')

#### access elements

In [571]:
matrix_array

array([[1, 2, 3],
       [2, 3, 4],
       [6, 7, 8],
       [4, 5, 6]])

In [572]:
matrix_array[0]

array([1, 2, 3])

In [573]:
matrix_array[-1]

array([4, 5, 6])

In [575]:
matrix_array[0][0]

1

In [576]:
matrix_array[0,0]

1

In [577]:
matrix_array[2,1]

7

### Descriptive Stats

In [587]:
#pulling back my big array from 1 - 100
a

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100])

#### using methods: the method is called on the numpy object
format: object.method()

In [579]:
a.min()

1

In [580]:
a.max()

100

In [583]:
a.mean(), a.std()

(50.5, 28.86607004772212)

In [582]:
a.sum()

5050

In [588]:
matrix_array.min()

1

In [589]:
matrix_array.mean()

4.25

#### using functions: using numpy to call functions
np.function(object)

In [590]:
max(a)

100

In [591]:
np.max(a)

100

In [592]:
np.std(a)

28.86607004772212

In [593]:
np.mean(a)

50.5

In [594]:
np.sum(a)

5050

In [596]:
np.mean(matrix_array)

4.25

### Array of Booleans! 

### Boolean Masks

1. create an array
2. make a list of booleans (mask)
3. combine

    format: array [ list of booleans ] 

In [597]:
#pull back our small array
my_array

array([1, 2, 3, 4])

In [604]:
#make list of booleans aka our mask
mask = [True, False, False, True]
mask

[True, False, False, True]

In [605]:
len(mask)

4

In [600]:
#combine them 
my_array[mask]

array([1, 4])

> only return values that are true. this is known as boolean masking

#### how else can we get our array of boolean values?

In [611]:
my_array

array([1, 2, 3, 4])

In [612]:
#write a conditional with the array
my_array == 4

array([False, False, False,  True])

In [620]:
#creat my array of booleans
mask = (my_array == 4)
mask

array([False, False, False,  True])

In [621]:
#combine them!
my_array[mask]

array([4])

In [617]:
mask = my_array > 2
mask

array([False, False,  True,  True])

In [622]:
#format: array_name [ list_of_booleans ]
my_array[mask]

array([4])

In [624]:
mask.dtype #gives me the type of everything inside my mask

dtype('bool')

In [630]:
my_array

array([1, 2, 3, 4])

#### what about with multiple conditions?

In [636]:
my_array

array([1, 2, 3, 4])

In [639]:
# use the & instead of AND
# use the | instead of OR
mask = (my_array == 2) | (my_array ==4)
mask

array([False,  True, False,  True])

In [640]:
my_array[mask]

array([2, 4])

#### do it with a matrix!

In [627]:
matrix_array

array([[1, 2, 3],
       [2, 3, 4],
       [6, 7, 8],
       [4, 5, 6]])

In [628]:
mask = (matrix_array > 3)
mask

array([[False, False, False],
       [False, False,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [629]:
matrix_array[mask]

array([4, 6, 7, 8, 4, 5, 6])

#### cool! can we just do it with a list instead of an array?

In [641]:
#pull back our initial list
my_list

[1, 2, 3, 4]

In [647]:
#create a array of booleans
my_list == 4

False

In [648]:
mask

array([False,  True, False,  True])

In [649]:
#apply an array of booleans to a list
my_list[mask]

TypeError: only integer scalar arrays can be converted to a scalar index

#### let's bring it back to arrays and make them bigger

In [650]:
#array
a

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100])

In [656]:
#mask
mask = a >= 90
mask

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [657]:
#boolean mask
a[mask]

array([ 90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100])

#### what if the numbers are out of order?

In [663]:
b = np.array([4,1,7,2,5])
b

array([4, 1, 7, 2, 5])

In [664]:
mask = (b > 2)
mask

array([ True, False,  True, False,  True])

In [665]:
b[mask]

array([4, 7, 5])

> still works!

#### shortcut?

In [667]:
a[a>=90]

array([ 90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100])

#### what if you put something that was all false, what would it return?

In [671]:
a==105

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False])

In [672]:
a[a==105]

array([], dtype=int64)

> an empty array

#### what if we wanted to check two conditions?

In [673]:
a

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100])

In [677]:
# note the parenthesis
# also note that we're using & instead of and
mask = (a > 5) & (a < 15)
mask

array([False, False, False, False, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False])

In [678]:
a[mask]

array([ 6,  7,  8,  9, 10, 11, 12, 13, 14])

#### can we convert our arrays back to lists?

In [690]:
list(my_array)

[1, 2, 3, 4]

In [691]:
matrix_array

array([[1, 2, 3],
       [2, 3, 4],
       [6, 7, 8],
       [4, 5, 6]])

In [693]:
new_list = []

for x in list(matrix_array):
    new_list.extend(x)

new_list

[1, 2, 3, 2, 3, 4, 6, 7, 8, 4, 5, 6]

#### what if we wanted the opposite of our mask?

In [695]:
mask

array([False, False, False, False, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False])

In [694]:
~mask

array([ True,  True,  True,  True,  True, False, False, False, False,
       False, False, False, False, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [697]:
# the ~ notates the opposite of
a[~mask]

array([  1,   2,   3,   4,   5,  15,  16,  17,  18,  19,  20,  21,  22,
        23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
        36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
        49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,
        62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
        75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,
        88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100])

## Vectorization Operations
- makes looping over math so easy and fast!

### Add one to every element

#### hard way: doing it in a list

In [715]:
my_list = [1,2,3,4]
my_list

[1, 2, 3, 4]

In [719]:
new_list = []

#using a for loop
for x in my_list:
    new_list.append(x+1)
new_list

[2, 3, 4, 5]

In [721]:
#using a list comprehension
[x+1 for x in my_list]

[2, 3, 4, 5]

#### easy way: doing it with an array

In [722]:
my_array

array([1, 2, 3, 4])

In [723]:
#using vectorization operations
my_array + 1

array([2, 3, 4, 5])

### more operations

In [726]:
my_array

array([1, 2, 3, 4])

In [725]:
my_array * 2

array([2, 4, 6, 8])

In [727]:
my_array ** 2

array([ 1,  4,  9, 16])

In [728]:
my_array/10

array([0.1, 0.2, 0.3, 0.4])

In [730]:
(my_array/10) + 2

array([2.1, 2.2, 2.3, 2.4])

In [731]:
np.log(my_array)

array([0.        , 0.69314718, 1.09861229, 1.38629436])

### show us the speed

In [740]:
#make a really big array
big_array = np.array(range(1_000_001))
big_array

array([      0,       1,       2, ...,  999998,  999999, 1000000])

In [758]:
%%timeit

big_array ** 2

641 µs ± 2.91 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [750]:
#make a really big list
big_list = list(big_array)
big_list[:10]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [756]:
%%timeit

for x in big_list:
    x**2

114 ms ± 1.7 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Numpy ways to create arrays

#### full of zeros

In [768]:
np.zeros?

In [770]:
# np.zeros()

In [771]:
np.zeros((10))

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [772]:
np.zeros((10))  + 1

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [773]:
np.zeros((10,2))

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

#### full of ones

In [774]:
np.ones(5)

array([1., 1., 1., 1., 1.])

In [776]:
len(np.ones(10_000))

10000

#### full of whatever you want 

In [777]:
np.full(10, 3)

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [779]:
np.full(10,True)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [780]:
np.full(10,5.5)

array([5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5])

In [781]:
np.full(10,'hello')

array(['hello', 'hello', 'hello', 'hello', 'hello', 'hello', 'hello',
       'hello', 'hello', 'hello'], dtype='<U5')

In [783]:
np.full((10,2),3)

array([[3, 3],
       [3, 3],
       [3, 3],
       [3, 3],
       [3, 3],
       [3, 3],
       [3, 3],
       [3, 3],
       [3, 3],
       [3, 3]])

#### a quicker way to make a range

In [784]:
np.array(range(10))

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [785]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [786]:
np.arange(1,11)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

#### an array of random integers

In [792]:
np.random.randint(2,20)

8

In [794]:
np.random.randint(2,20, 10)

array([14,  4,  3,  9, 17,  2,  8, 11, 12, 13])

In [804]:
np.random.randint(-1,10, 21)

array([ 4,  9,  0,  1,  2,  1,  1,  9,  6, -1,  9,  7,  5,  5,  0,  8,  6,
        3,  1,  8,  1])

#### an array of random numbers from the standard distribution

In [811]:
np.random.randn(10)

array([-0.87503179, -0.85953281, -0.24724674,  0.85895231, -1.8150813 ,
        1.47620936, -1.11142625, -1.70640528,  0.43013921,  0.1297981 ])

In [818]:
np.random.randn(10000_000).mean()

0.00017839867720361493