## 1. Basics

In [55]:
import numpy as np

In [56]:
a1 = np.array([1,2,3])
a1,a1.shape

(array([1, 2, 3]), (3,))

In [57]:
type(a1)

numpy.ndarray

In [58]:
a2 = np.array([[1, 2.5, 3.6],
               [4.5, 5, 6.7]])
a2, a2.shape

(array([[1. , 2.5, 3.6],
        [4.5, 5. , 6.7]]),
 (2, 3))

In [59]:
a3 = np.array([[[1,2,3],[4,5,6],[7,8,9]],[[10,11,12],[13,14,15],[16,17,18]]])
a3, a3.shape

(array([[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8,  9]],
 
        [[10, 11, 12],
         [13, 14, 15],
         [16, 17, 18]]]),
 (2, 3, 3))

In [60]:
a1.ndim,a2.ndim,a3.ndim

(1, 2, 3)

In [61]:
# Create a pandas dataframe from a numpy array
import pandas as pd
df1 = pd.DataFrame(a1)
df2 = pd.DataFrame(a2)
# df3 = pd.DataFrame(a3)  # can't do this as dataframes have 2-dimensions max
df1,df2

(   0
 0  1
 1  2
 2  3,
      0    1    2
 0  1.0  2.5  3.6
 1  4.5  5.0  6.7)

## 2. Creating Arrays




In [62]:
sample_array = np.array([1,2,3])
sample_array, sample_array.dtype

(array([1, 2, 3]), dtype('int64'))

In [63]:
ones_1 = np.ones((1,3))
ones = np.ones((2,4))
zeros = np.zeros((2,4))
ones, zeros

(array([[1., 1., 1., 1.],
        [1., 1., 1., 1.]]),
 array([[0., 0., 0., 0.],
        [0., 0., 0., 0.]]))

In [64]:
ones.dtype, zeros.dtype

(dtype('float64'), dtype('float64'))

In [65]:
random_array = np.random.randint(0,10,size=(3,5))
random_array

array([[8, 1, 3, 3, 3],
       [7, 0, 1, 9, 9],
       [0, 4, 7, 3, 2]])

## 3. Random Seed  (random numbers stay constant)


In [66]:
np.random.seed(seed=42)
random_array_4 = np.random.randint(0,10,size=(5,3))
random_array_4

array([[6, 3, 7],
       [4, 6, 9],
       [2, 6, 7],
       [4, 3, 7],
       [7, 2, 5]])

## 4. Viewing Arrays & Matrices




In [67]:
np.unique(random_array_4)

array([2, 3, 4, 5, 6, 7, 9])

In [68]:
a1,a1[0]

(array([1, 2, 3]), np.int64(1))

In [69]:
a2,a2[1]

(array([[1. , 2.5, 3.6],
        [4.5, 5. , 6.7]]),
 array([4.5, 5. , 6.7]))

In [70]:
a3, a3[0]

(array([[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8,  9]],
 
        [[10, 11, 12],
         [13, 14, 15],
         [16, 17, 18]]]),
 array([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]))

### Slicing arrays

In [71]:
a3[:2], a3[:2,:2], a3[:2,:2,:2]

(array([[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8,  9]],
 
        [[10, 11, 12],
         [13, 14, 15],
         [16, 17, 18]]]),
 array([[[ 1,  2,  3],
         [ 4,  5,  6]],
 
        [[10, 11, 12],
         [13, 14, 15]]]),
 array([[[ 1,  2],
         [ 4,  5]],
 
        [[10, 11],
         [13, 14]]]))

In [72]:
a4 = np.random.randint(10,size=(2,3,4,5))
a4

array([[[[4, 1, 7, 5, 1],
         [4, 0, 9, 5, 8],
         [0, 9, 2, 6, 3],
         [8, 2, 4, 2, 6]],

        [[4, 8, 6, 1, 3],
         [8, 1, 9, 8, 9],
         [4, 1, 3, 6, 7],
         [2, 0, 3, 1, 7]],

        [[3, 1, 5, 5, 9],
         [3, 5, 1, 9, 1],
         [9, 3, 7, 6, 8],
         [7, 4, 1, 4, 7]]],


       [[[9, 8, 8, 0, 8],
         [6, 8, 7, 0, 7],
         [7, 2, 0, 7, 2],
         [2, 0, 4, 9, 6]],

        [[9, 8, 6, 8, 7],
         [1, 0, 6, 6, 7],
         [4, 2, 7, 5, 2],
         [0, 2, 4, 2, 0]],

        [[4, 9, 6, 6, 8],
         [9, 9, 2, 6, 0],
         [3, 3, 4, 6, 6],
         [3, 6, 2, 5, 1]]]])

#### Let's try to get the $8, 8$  in the array  $[9,8,8,0,8]$

In [73]:
a4[1][0][0][1:3]

array([8, 8])

In [74]:
# Get the first 4 numbers of the inner-most arrays
a4[:,:,:1,:4]

array([[[[4, 1, 7, 5]],

        [[4, 8, 6, 1]],

        [[3, 1, 5, 5]]],


       [[[9, 8, 8, 0]],

        [[9, 8, 6, 8]],

        [[4, 9, 6, 6]]]])

## 5. Manipulating and Comparing Arrays

In [75]:
# Arithmetic
a1, a1 + 100

(array([1, 2, 3]), array([101, 102, 103]))

In [76]:
a1, ones_1, a1+ones_1

(array([1, 2, 3]), array([[1., 1., 1.]]), array([[2., 3., 4.]]))

In [77]:
a1*a1

array([1, 4, 9])

In [78]:
np.dot(a1,a1)

np.int64(14)

In [79]:
a2, a2*a2

(array([[1. , 2.5, 3.6],
        [4.5, 5. , 6.7]]),
 array([[ 1.  ,  6.25, 12.96],
        [20.25, 25.  , 44.89]]))

In [80]:
a2,np.transpose(a2), np.dot(a2,np.transpose(a2))

(array([[1. , 2.5, 3.6],
        [4.5, 5. , 6.7]]),
 array([[1. , 4.5],
        [2.5, 5. ],
        [3.6, 6.7]]),
 array([[20.21, 41.12],
        [41.12, 90.14]]))

In [81]:
a3,np.transpose(a3), np.dot(a3,np.transpose(a3))

(array([[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8,  9]],
 
        [[10, 11, 12],
         [13, 14, 15],
         [16, 17, 18]]]),
 array([[[ 1, 10],
         [ 4, 13],
         [ 7, 16]],
 
        [[ 2, 11],
         [ 5, 14],
         [ 8, 17]],
 
        [[ 3, 12],
         [ 6, 15],
         [ 9, 18]]]),
 array([[[[ 30,  84],
          [ 36,  90],
          [ 42,  96]],
 
         [[ 66, 201],
          [ 81, 216],
          [ 96, 231]],
 
         [[102, 318],
          [126, 342],
          [150, 366]]],
 
 
        [[[138, 435],
          [171, 468],
          [204, 501]],
 
         [[174, 552],
          [216, 594],
          [258, 636]],
 
         [[210, 669],
          [261, 720],
          [312, 771]]]]))

In [82]:
a4, np.sum(a4)

(array([[[[4, 1, 7, 5, 1],
          [4, 0, 9, 5, 8],
          [0, 9, 2, 6, 3],
          [8, 2, 4, 2, 6]],
 
         [[4, 8, 6, 1, 3],
          [8, 1, 9, 8, 9],
          [4, 1, 3, 6, 7],
          [2, 0, 3, 1, 7]],
 
         [[3, 1, 5, 5, 9],
          [3, 5, 1, 9, 1],
          [9, 3, 7, 6, 8],
          [7, 4, 1, 4, 7]]],
 
 
        [[[9, 8, 8, 0, 8],
          [6, 8, 7, 0, 7],
          [7, 2, 0, 7, 2],
          [2, 0, 4, 9, 6]],
 
         [[9, 8, 6, 8, 7],
          [1, 0, 6, 6, 7],
          [4, 2, 7, 5, 2],
          [0, 2, 4, 2, 0]],
 
         [[4, 9, 6, 6, 8],
          [9, 9, 2, 6, 0],
          [3, 3, 4, 6, 6],
          [3, 6, 2, 5, 1]]]]),
 np.int64(559))

## 6. Comparing Numpy and Pandas functions

In [83]:
massive_array = np.random.random(1000000)
massive_array[:10]

array([0.62435405, 0.29563369, 0.10549426, 0.45653457, 0.21844044,
       0.41650995, 0.88328026, 0.32434502, 0.12208795, 0.35629784])

In [84]:
%timeit sum(massive_array)  # Python
%timeit np.sum(massive_array)  # Numpy is approximately 17x faster than Python

42 ms ± 309 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
207 μs ± 9.08 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


## 7. Aggregations

In [85]:
a2, np.mean(a2), np.max(a2), np.min(a2), np.std(a2), np.var(a2), np.sqrt(np.var(a2)), np.argmax(a2), np.argmin(a2)

(array([[1. , 2.5, 3.6],
        [4.5, 5. , 6.7]]),
 np.float64(3.8833333333333333),
 np.float64(6.7),
 np.float64(1.0),
 np.float64(1.8197222010210483),
 np.float64(3.3113888888888887),
 np.float64(1.8197222010210483),
 np.int64(5),
 np.int64(0))

In [86]:
a2, np.mean(a2,axis=1)

(array([[1. , 2.5, 3.6],
        [4.5, 5. , 6.7]]),
 array([2.36666667, 5.4       ]))

## 8. Reshaping and Transposing

In [87]:
# a3*a2  # this will throw an error as the shapes are not aligned

In [88]:
a2, a2.shape, a2.reshape(2,3,1)

(array([[1. , 2.5, 3.6],
        [4.5, 5. , 6.7]]),
 (2, 3),
 array([[[1. ],
         [2.5],
         [3.6]],
 
        [[4.5],
         [5. ],
         [6.7]]]))

In [89]:
a2_reshaped = a2.reshape(2,3,1)
a3*a2_reshaped

array([[[  1. ,   2. ,   3. ],
        [ 10. ,  12.5,  15. ],
        [ 25.2,  28.8,  32.4]],

       [[ 45. ,  49.5,  54. ],
        [ 65. ,  70. ,  75. ],
        [107.2, 113.9, 120.6]]])

In [90]:
# a2*a2.T  # This will not work as the shapes are not aligned

In [91]:
a4, a4.T

(array([[[[4, 1, 7, 5, 1],
          [4, 0, 9, 5, 8],
          [0, 9, 2, 6, 3],
          [8, 2, 4, 2, 6]],
 
         [[4, 8, 6, 1, 3],
          [8, 1, 9, 8, 9],
          [4, 1, 3, 6, 7],
          [2, 0, 3, 1, 7]],
 
         [[3, 1, 5, 5, 9],
          [3, 5, 1, 9, 1],
          [9, 3, 7, 6, 8],
          [7, 4, 1, 4, 7]]],
 
 
        [[[9, 8, 8, 0, 8],
          [6, 8, 7, 0, 7],
          [7, 2, 0, 7, 2],
          [2, 0, 4, 9, 6]],
 
         [[9, 8, 6, 8, 7],
          [1, 0, 6, 6, 7],
          [4, 2, 7, 5, 2],
          [0, 2, 4, 2, 0]],
 
         [[4, 9, 6, 6, 8],
          [9, 9, 2, 6, 0],
          [3, 3, 4, 6, 6],
          [3, 6, 2, 5, 1]]]]),
 array([[[[4, 9],
          [4, 9],
          [3, 4]],
 
         [[4, 6],
          [8, 1],
          [3, 9]],
 
         [[0, 7],
          [4, 4],
          [9, 3]],
 
         [[8, 2],
          [2, 0],
          [7, 3]]],
 
 
        [[[1, 8],
          [8, 8],
          [1, 9]],
 
         [[0, 8],
          [1, 0],
      

In [92]:
a1, a1*a1, a1*a1.T

(array([1, 2, 3]), array([1, 4, 9]), array([1, 4, 9]))

## 9. Butter Sales Example

In [93]:
np.random.seed(0)
sales_amounts = np.random.randint(20,size=(5,3))
sales_amounts

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [94]:
# Create weekly sales dataframe

weekly_sales = pd.DataFrame(sales_amounts,index=["Mon","Tues","Wed","Thurs","Fri"],columns=["Almond Butter","Peanut Butter","Cashew Butter"])
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Mon,12,15,0
Tues,3,3,7
Wed,9,19,18
Thurs,4,6,12
Fri,1,6,7


In [95]:
# Create butter prices dataframe
prices = np.array([10,8,12])
prices_df = pd.DataFrame(prices.reshape(1,3),index=["Price"],columns=["Almond Butter","Peanut Butter","Cashew Butter"])
prices_df

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Price,10,8,12


In [96]:
sales_by_day = np.dot(weekly_sales,prices)
weekly_sales = np.sum(sales_by_day)
sales_by_day,weekly_sales

(array([240, 138, 458, 232, 142]), np.int64(1210))

## 10. Comparison Operators

In [97]:
a1, a2

(array([1, 2, 3]),
 array([[1. , 2.5, 3.6],
        [4.5, 5. , 6.7]]))

In [98]:
a1>a2, a2>a1

(array([[False, False, False],
        [False, False, False]]),
 array([[False,  True,  True],
        [ True,  True,  True]]))

In [99]:
a1==a2

array([[ True, False, False],
       [False, False, False]])

## 11. Sorting Arrays

In [100]:
random_array = np.random.randint(10,size=(3,5))
random_array

array([[7, 8, 1, 5, 9],
       [8, 9, 4, 3, 0],
       [3, 5, 0, 2, 3]])

In [101]:
np.sort(random_array)

array([[1, 5, 7, 8, 9],
       [0, 3, 4, 8, 9],
       [0, 2, 3, 3, 5]])

In [102]:
np.argsort(random_array)

array([[2, 3, 0, 1, 4],
       [4, 3, 2, 0, 1],
       [2, 3, 0, 4, 1]])

In [103]:
np.argmax(random_array,axis=0)

array([1, 1, 1, 0, 0])

In [104]:
np.argmax(random_array,axis=1)

array([4, 1, 1])

## 12. Practical Example

<img src="images/numpy-images/panda.png">

In [106]:
from matplotlib.image import imread

panda_array = imread("images/numpy-images/panda.png")
panda_array

array([[[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       ...,

       [[0.13333334, 0.07450981, 0.05490196],
        [0.12156863, 0.0627451 , 0.04313726],
        [0.10980392, 0