# Introducing Numpy

### What is Numpy?
- Our linear algebra library for Python
- Provides an implementation of a high-performance multi-dimensional array object
- Provides a large toolbox of linear algebra methods
- Targets CPython in order to optimize for speed means it's fast.


### What problems does does Numpy solve for us?
- Native lists in Python don't do linear algebra out of the box and they're not as fast as possible.
- Adds a robust and mature linear algebra library for our use!
- Vectorization means operations are applied to whole arrays instead of individual elements (big deal)

In [4]:
# importing numpy and aliasing as np
import numpy as np

## Vectorization
- Vectorization means operations are applied to whole arrays instead of individual elements
- Scalar multiplication = multiplying an array or matrix by a number
- Applying a function or operation to every element in an array

In [13]:
numbers = list(range(100000))
numbers = np.array(numbers)
type(numbers)

len(numbers)
numbers * 2
numbers.shape

array([    0,     1,     2, ..., 99997, 99998, 99999])

In [15]:
x = np.array([1, 2, 3])
y = np.array([2, 3, 4])
x + y

array([3, 5, 7])

In [16]:
x - y

array([-1, -1, -1])

In [17]:
x * y

array([ 2,  6, 12])

In [18]:
x.dot(y)

20

In [24]:
x / y 

array([0.5       , 0.66666667, 0.75      ])

In [23]:
numbers
numbers.mean()
numbers.min()
numbers.max()

99999

In [25]:
numbers + 3

array([     3,      4,      5, ..., 100000, 100001, 100002])

In [26]:
x.dot(y)

20

In [27]:
y.dot(x)

20

### So you heard Numpy was fast...
Let's compare base Python to numpy operations

In [30]:
# Add one to every number in a list of ten million numbers
numbers = list(range(10_000_000))

%timeit [number + 1 for number in numbers]

729 ms ± 29.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [31]:
# Add one to every number in an array of ten million
numbers = np.arange(10_000_000)
%timeit numbers + 1

29.5 ms ± 944 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


867 µs ± 23 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [32]:
# Generate a thousand random numbers between 1 and 1000

import random
%timeit [random.randint(1, 1000) for _ in range(1000)]

965 µs ± 21.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [33]:
# Generate a thousand random numbers between 1 and 1000

%timeit np.random.randint(1000, size=1000)

6.31 µs ± 85.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


87.08189158016148

161.8075801749271

In [34]:
numbers

array([      0,       1,       2, ..., 9999997, 9999998, 9999999])

In [35]:
numbers[4]

4

In [36]:
len(numbers)

10000000

In [37]:
numbers[3:5]

array([3, 4])

In [38]:
numbers[-1]

9999999

In [42]:
divisible_by_fifteen = numbers[numbers % 15 == 0]

In [44]:
divisible_by_fifteen[1:5]

array([15, 30, 45, 60])

In [45]:
even_numbers = numbers[numbers % 2 == 0]

In [46]:
even_numbers

array([      0,       2,       4, ..., 9999994, 9999996, 9999998])

In [47]:
matrix = np.array([
    x,
    y,
    numbers
])

In [48]:
matrix

array([array([1, 2, 3]), array([2, 3, 4]),
       array([      0,       1,       2, ..., 9999997, 9999998, 9999999])],
      dtype=object)

In [49]:
matrix[0]

array([1, 2, 3])

In [50]:
matrix[1]

array([2, 3, 4])

In [51]:
matrix[2]

array([      0,       1,       2, ..., 9999997, 9999998, 9999999])

In [52]:
matrix * 3

array([array([3, 6, 9]), array([ 6,  9, 12]),
       array([       0,        3,        6, ..., 29999991, 29999994, 29999997])],
      dtype=object)

In [53]:
matrix[1] * 100

array([200, 300, 400])

In [54]:
matrix[1] = matrix[1] * 100

In [55]:
matrix

array([array([1, 2, 3]), array([200, 300, 400]),
       array([      0,       1,       2, ..., 9999997, 9999998, 9999999])],
      dtype=object)

In [56]:
matrix * matrix

array([array([1, 4, 9]), array([ 40000,  90000, 160000]),
       array([             0,              1,              4, ...,
       99999940000009, 99999960000004, 99999980000001])], dtype=object)

In [59]:
x / x

array([1., 1., 1.])

In [60]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [65]:
np.ones((10, 3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [62]:
ones = np.ones(30)

In [63]:
ones.reshape(10, 3)

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [66]:
type(ones[0])

numpy.float64

In [67]:
numbers = np.array([-5, 10, 11, 13, 15, -5, 0, 0, 0, 1, 1, 3, -4, -4])

In [68]:
numbers == 10

array([False,  True, False, False, False, False, False, False, False,
       False, False, False, False, False])

In [71]:
mask_above_ten = numbers >= 10

In [72]:
numbers[mask_above_ten]

array([10, 11, 13, 15])

In [85]:
fruits = np.array(["banana", "kiwi", "mango", "strawberry", "tomato"])
type(fruits)
for fruit in fruits:
    print(fruit)

banana
kiwi
mango
strawberry
tomato


In [84]:
x = np.array([1, 2, 3, 4])
x

array([1, 2, 3, 4])

In [86]:
numbers

array([-5, 10, 11, 13, 15, -5,  0,  0,  0,  1,  1,  3, -4, -4])

In [88]:
np.median(numbers)

0.5

In [89]:
numbers.min()
numbers.max()
numbers.mean()
numbers.sum()
numbers.std()

6.630172220498452

In [90]:
np.arange(100, 200)

array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
       113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
       126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
       139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151,
       152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
       165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177,
       178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190,
       191, 192, 193, 194, 195, 196, 197, 198, 199])

In [91]:
np.full(10, 3.3)

array([3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3])