In [1]:
import numpy as np

# DataTypes & Attributes

#### NumPy's main datatype is ndarray (n-dimensional array)

In [2]:
a1 = np.array([1, 2, 3])
a1

array([1, 2, 3])

In [3]:
type(a1)

numpy.ndarray

In [7]:
a2 = np.array([[1, 2.0, 3.3], 
               [4, 5, 6.6]])

a3 = np.array([[[1, 2, 3],
               [4, 5, 6],
               [7, 8, 9]],
              [[10, 11, 12],
               [13, 14, 15],
               [16, 17, 18]]])

In [11]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.6]])

In [14]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [15]:
a1.shape

(3,)

In [16]:
a2.shape

(2, 3)

In [17]:
a3.shape

(2, 3, 3)

In [18]:
# Printing the number of dimensions of each array
a1.ndim, a2.ndim, a3.ndim

(1, 2, 3)

In [19]:
# Printing the number of elements each array has
a1.size, a2.size, a3.size

(3, 6, 18)

In [20]:
type(a1), type(a2), type(a3)

(numpy.ndarray, numpy.ndarray, numpy.ndarray)

# Creating DataFrame from NumPy array

In [22]:
import pandas as pd

In [26]:
df = pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.3
1,4.0,5.0,6.6


# Creating NumPy arrays

In [43]:
sample_array = np.array([1, 2, 3])
sample_array

array([1, 2, 3])

In [44]:
ones = np.ones([2, 3])
ones

array([[1., 1., 1.],
       [1., 1., 1.]])

In [45]:
zeros = np.zeros([2, 3])
zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [46]:
range_array = np.arange(1, 10, 2)
range_array

array([1, 3, 5, 7, 9])

In [47]:
random_array = np.random.randint(0, 10, [3, 5])
random_array

array([[5, 4, 2, 9, 9],
       [2, 7, 5, 5, 4],
       [4, 5, 5, 6, 5]])

In [48]:
random_array = np.random.random([3, 5])
random_array

array([[0.76853681, 0.96908465, 0.1702798 , 0.48879066, 0.73320401],
       [0.48518121, 0.73530725, 0.61294617, 0.9088697 , 0.19086233],
       [0.69908095, 0.70748156, 0.17540818, 0.5731508 , 0.21139793]])

#### Getting the same "random" array using seed

In [58]:
# Now that we have set a seed, the array's values will no longer change
# The array's values will remain the same as long as the seed is not changed
np.random.seed(1)
np.random.randint(10, size=[5, 3])

array([[5, 8, 9],
       [5, 0, 0],
       [1, 7, 6],
       [9, 2, 4],
       [5, 2, 4]])

In [61]:
# No seed is set, so the values of the array keep changing
random_array = np.random.randint(10, size=[5, 3])
random_array

array([[8, 1, 4],
       [0, 3, 9],
       [2, 0, 4],
       [9, 2, 7],
       [7, 9, 8]])

# Viewing Arrays and Matrices

In [62]:
np.unique(random_array)

array([0, 1, 2, 3, 4, 7, 8, 9])

In [71]:
print(a1)
print('-------------------')
print(a2)
print('-------------------')
print(a3)

[1 2 3]
-------------------
[[1.  2.  3.3]
 [4.  5.  6.6]]
-------------------
[[[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]]

 [[10 11 12]
  [13 14 15]
  [16 17 18]]]


In [72]:
a1[0]

1

In [73]:
a2[0]

array([1. , 2. , 3.3])

In [74]:
a3[0]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [75]:
a3[:2, :2, :2]

array([[[ 1,  2],
        [ 4,  5]],

       [[10, 11],
        [13, 14]]])

# Manipulating & Comparing Arrays

### Arithmetic

In [79]:
a1

array([1, 2, 3])

In [81]:
ones = np.ones(3)
ones

array([1., 1., 1.])

In [82]:
a1 + ones

array([2., 3., 4.])

In [83]:
a1 - ones

array([0., 1., 2.])

In [84]:
a1 * ones

array([1., 2., 3.])

a1 is broadcasted by NumPy to become the shape of a2 allowing element-wise multiplication

In [87]:
a1, a2

(array([1, 2, 3]),
 array([[1. , 2. , 3.3],
        [4. , 5. , 6.6]]))

In [88]:
a1 * a2

array([[ 1. ,  4. ,  9.9],
       [ 4. , 10. , 19.8]])

In [89]:
a2 / a1

array([[1. , 1. , 1.1],
       [4. , 2.5, 2.2]])

In [90]:
# Floor division
a2 // a1

array([[1., 1., 1.],
       [4., 2., 2.]])

In [91]:
a2 ** a1

array([[  1.   ,   4.   ,  35.937],
       [  4.   ,  25.   , 287.496]])

In [93]:
a2 % a1

array([[0. , 0. , 0.3],
       [0. , 1. , 0.6]])

### Aggregation

Aggregation = performing the same operation on a number of things

In [94]:
a1

array([1, 2, 3])

<b style="color: orange">IMPORTANT!</b>
Use Python's methods (`sum()`) on Python datatypes and use NumPy's methods on NumPy arrays (`np.sum()`)

In [97]:
print(sum(a1))

# Use this because a1 is a ndarray
print(np.sum(a1))

6
6


In [98]:
massive_array = np.random.random(1000000)

In [99]:
massive_array[:10]

array([0.46923853, 0.09617226, 0.90337017, 0.11949047, 0.52479938,
       0.083623  , 0.91686133, 0.91044838, 0.29893011, 0.58438912])

In [100]:
%timeit sum(massive_array) # Python's sum
%timeit np.sum(massive_array) # NumPy's np.sum

63.6 ms ± 2.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
602 µs ± 36.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [101]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.6]])

In [102]:
np.mean(a2)

3.65

In [103]:
np.max(a2)

6.6

In [104]:
np.min(a2)

1.0

In [108]:
np.std(a2)

1.8490988796347985

In [110]:
# Variance is the square of the standard deviation
np.var(a2)

3.419166666666667

### Reshaping & Transposing

In [113]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.6]])

In [114]:
a2.shape

(2, 3)

In [115]:
a2.reshape([2, 3, 1])

array([[[1. ],
        [2. ],
        [3.3]],

       [[4. ],
        [5. ],
        [6.6]]])

Transpose = Flips the shape of the matrix

In [116]:
a2.T

array([[1. , 4. ],
       [2. , 5. ],
       [3.3, 6.6]])

### Dot Product
- Not the linear algebra dot product
- It actually performs normal matrix multiplication (not element-wise, the other way)

In [117]:
np.random.seed(0)

# Number of jars sold
sales_amounts = np.random.randint(20, size=[5, 3])
sales_amounts

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [118]:
weekly_sales = pd.DataFrame(sales_amounts, index=['Mon', 'Tue', 'Wed', 'Thu', 'Fri'], 
                            columns=['Almond butter', 'Peanut butter', 'Cashew butter'])
weekly_sales

Unnamed: 0,Almond butter,Peanut butter,Cashew butter
Mon,12,15,0
Tue,3,3,7
Wed,9,19,18
Thu,4,6,12
Fri,1,6,7


In [119]:
# Create prices array
prices = np.array([10, 8, 12])
prices

array([10,  8, 12])

In [129]:
# Create butter_prices DataFrame
butter_prices = pd.DataFrame(prices.reshape(1, 3), index=['Price'], columns=['Almond butter', 'Peanut butter', 'Cashew butter'])
butter_prices

Unnamed: 0,Almond butter,Peanut butter,Cashew butter
Price,10,8,12


In [133]:
weekly_sales['Total ($)'] = sales_amounts.dot(prices.T)

In [134]:
weekly_sales

Unnamed: 0,Almond butter,Peanut butter,Cashew butter,Total ($)
Mon,12,15,0,240
Tue,3,3,7,138
Wed,9,19,18,458
Thu,4,6,12,232
Fri,1,6,7,142


### Comparison Operators

In [135]:
a1, a2

(array([1, 2, 3]),
 array([[1. , 2. , 3.3],
        [4. , 5. , 6.6]]))

In [137]:
a1 >= a2

array([[ True,  True, False],
       [False, False, False]])

In [138]:
a1 == a2

array([[ True,  True, False],
       [False, False, False]])

### Sorting Arrays

In [144]:
random_array = np.random.randint(10, size=[3, 5])
random_array

array([[8, 1, 3, 3, 3],
       [7, 0, 1, 9, 9],
       [0, 4, 7, 3, 2]])

In [147]:
# Sorts every row
np.sort(random_array)

array([[1, 3, 3, 3, 8],
       [0, 1, 7, 9, 9],
       [0, 2, 3, 4, 7]])

In [149]:
# Sorts the indices of the values
np.argsort(random_array)

array([[1, 2, 3, 4, 0],
       [1, 2, 0, 3, 4],
       [0, 4, 3, 1, 2]], dtype=int64)

In [158]:
random_array

array([[8, 1, 3, 3, 3],
       [7, 0, 1, 9, 9],
       [0, 4, 7, 3, 2]])

In [155]:
np.argmin(random_array, axis=0)

array([2, 1, 1, 0, 2], dtype=int64)

In [157]:
np.argmax(random_array, axis=1)

array([0, 3, 2], dtype=int64)

In [156]:
np.argmax(random_array, axis=0)

array([0, 2, 2, 1, 1], dtype=int64)

# Practical Example - NumPy in Action!

### Want to turn an image into a NumPy array

In [161]:
from matplotlib.image import imread

In [164]:
panda = imread('data/panda.png')
type(panda)

numpy.ndarray

In [167]:
# Image is stored as an array that represents the color values for each pixel
panda

array([[[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       ...,

       [[0.13333334, 0.07450981, 0.05490196],
        [0.12156863, 0.0627451 , 0.04313726],
        [0.10980392, 0

In [168]:
panda.size, panda.shape, panda.ndim

(24465000, (2330, 3500, 3), 3)