In [2]:
import numpy as np



## DataTypes & Attributes

In [3]:
# NumPy's main datatype is ndarray.
a1 = np.array([1, 2, 3])
a1

array([1, 2, 3])

In [4]:
type(a1)

numpy.ndarray

In [5]:
a2 = np.array([[1, 2.0, 3.3],
               [4, 5, 6.5]])

a3 = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]]])

In [6]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [7]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [8]:
a1.shape

(3,)

In [9]:
a2.shape

(2, 3)

In [10]:
a3.shape

(2, 3, 3)

In [11]:
a1

array([1, 2, 3])

In [12]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [13]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [14]:
a1.ndim, a2.ndim, a3.ndim

(1, 2, 3)

In [15]:
a1.dtype, a2.dtype, a3.dtype

(dtype('int64'), dtype('float64'), dtype('int64'))

In [16]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [17]:
a1.size, a2.size, a3.size

(3, 6, 18)

In [18]:
type(a1), type(a2), type(a3)

(numpy.ndarray, numpy.ndarray, numpy.ndarray)

In [19]:
# Create a DataFrame from a NumPy array.
import pandas as pd

df = pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.3
1,4.0,5.0,6.5


## 2. Creating arrays

In [20]:
sample_array = np.array([1, 2, 3])
sample_array

array([1, 2, 3])

In [21]:
sample_array.dtype

dtype('int64')

In [22]:
ones = np.ones(3)
ones

array([1., 1., 1.])

In [23]:
ones.dtype

dtype('float64')

In [24]:
type(ones)

numpy.ndarray

In [25]:
zeros = np.zeros((2, 3))
zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [26]:
range_array = np.arange(0, 10, 2)
range_array

array([0, 2, 4, 6, 8])

In [27]:
random_array = np.random.randint(0, 10, size=(3, 5))
random_array

array([[4, 9, 7, 9, 4],
       [0, 0, 8, 6, 0],
       [4, 1, 8, 4, 7]])

In [28]:
random_array.size

15

In [29]:
random_array.shape

(3, 5)

In [30]:
random_array2 = np.random.random((5, 3))
random_array2

array([[0.89643712, 0.19514566, 0.1982907 ],
       [0.45785397, 0.78546223, 0.72886958],
       [0.31622612, 0.56780952, 0.75721477],
       [0.01703616, 0.98549356, 0.72349319],
       [0.51574951, 0.65257854, 0.4762613 ]])

In [31]:
random_array2.shape

(5, 3)

In [32]:
random_array3 = np.random.rand(5, 3)
random_array3

array([[0.20501299, 0.20227157, 0.09147232],
       [0.72454977, 0.00893542, 0.32507637],
       [0.52948743, 0.35812119, 0.01126688],
       [0.90026261, 0.22641738, 0.54633222],
       [0.69123489, 0.00416687, 0.0224242 ]])

In [33]:
# Pseudo-random numbers
np.random.seed(seed=9999)
random_array4 = np.random.randint(10, size=(5, 3))
random_array4

array([[6, 1, 6],
       [5, 5, 9],
       [6, 8, 0],
       [8, 1, 5],
       [3, 1, 0]])

In [34]:
np.random.seed(7)
random_array5 = np.random.random((5, 3))
random_array5

array([[0.07630829, 0.77991879, 0.43840923],
       [0.72346518, 0.97798951, 0.53849587],
       [0.50112046, 0.07205113, 0.26843898],
       [0.4998825 , 0.67923   , 0.80373904],
       [0.38094113, 0.06593635, 0.2881456 ]])

In [35]:
random_array5 = np.random.random((5, 3))
random_array5

array([[0.90959353, 0.21338535, 0.45212396],
       [0.93120602, 0.02489923, 0.60054892],
       [0.9501295 , 0.23030288, 0.54848992],
       [0.90912837, 0.13316945, 0.52341258],
       [0.75040986, 0.66901324, 0.46775286]])

In [36]:
random_array4.shape 

(5, 3)

### 3. Viewing arrays and matrices

In [37]:
np.unique(random_array4)

array([0, 1, 3, 5, 6, 8, 9])

In [38]:
a1

array([1, 2, 3])

In [39]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [40]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [41]:
a1[0]

1

In [42]:
a2.shape

(2, 3)

In [43]:
a2[0]

array([1. , 2. , 3.3])

In [44]:
a3.shape

(2, 3, 3)

In [45]:
a3[0]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [46]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [47]:
a2[1]

array([4. , 5. , 6.5])

In [48]:
a3.shape

(2, 3, 3)

In [49]:
 a3[:2, 2:, 2:]

array([[[ 9]],

       [[18]]])

In [50]:
a4 = np.random.randint(10, size=(2, 3, 4, 5))
a4

array([[[[6, 7, 7, 9, 3],
         [0, 7, 7, 7, 0],
         [5, 4, 3, 1, 3],
         [1, 3, 4, 3, 1]],

        [[9, 5, 9, 1, 2],
         [3, 2, 2, 5, 7],
         [3, 0, 9, 9, 3],
         [4, 5, 3, 0, 4]],

        [[8, 6, 7, 2, 7],
         [3, 8, 6, 6, 5],
         [6, 5, 7, 1, 5],
         [4, 4, 9, 9, 0]]],


       [[[6, 2, 6, 8, 2],
         [4, 1, 6, 1, 5],
         [1, 6, 9, 8, 6],
         [5, 9, 7, 5, 4]],

        [[9, 6, 8, 1, 5],
         [5, 8, 3, 7, 7],
         [9, 4, 7, 5, 9],
         [6, 2, 0, 5, 3]],

        [[0, 5, 7, 1, 8],
         [4, 9, 0, 2, 0],
         [7, 6, 2, 9, 9],
         [5, 1, 0, 0, 9]]]])

In [51]:
a4.shape, a4.ndim

((2, 3, 4, 5), 4)

In [52]:
# Get the first 4 numbers of the inner most array
a4[:, :, :, :1]

array([[[[6],
         [0],
         [5],
         [1]],

        [[9],
         [3],
         [3],
         [4]],

        [[8],
         [3],
         [6],
         [4]]],


       [[[6],
         [4],
         [1],
         [5]],

        [[9],
         [5],
         [9],
         [6]],

        [[0],
         [4],
         [7],
         [5]]]])

## 4. Manipulating & comparing arrays

### Arithmetic

In [53]:
 a1

array([1, 2, 3])

In [54]:
ones = np.ones(3)
ones

array([1., 1., 1.])

In [55]:
a1 + ones

array([2., 3., 4.])

In [56]:
a1 - ones

array([0., 1., 2.])

In [57]:
a1 * ones

array([1., 2., 3.])

In [58]:
a1

array([1, 2, 3])

In [59]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [60]:
a1 * a2

array([[ 1. ,  4. ,  9.9],
       [ 4. , 10. , 19.5]])

In [61]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [62]:
# How can you reshape a2 to be compatible with a3? 
# Search: "How to reshape numpy array"
a2 * a3

ValueError: operands could not be broadcast together with shapes (2,3) (2,3,3) 

In [None]:
a1 / ones

In [None]:
a2 / a1

In [None]:
# Floor division removes the decimals (rounds down)
a2 // a1

In [None]:
a2

In [None]:
a2 ** 2

In [None]:
np.square(a2)

In [None]:
a1 + ones

In [None]:
np.add(a1, ones)

In [None]:
a1 % 2

In [None]:
a1 / 2

In [None]:
a2 % 2

In [None]:
np.exp(a1)

In [None]:
np.log(a1)

### Agregation

Aggregation = performing the same operation on a number of things

In [None]:
listy_list = [1, 2, 3]
type(listy_list)

In [None]:
sum(listy_list)

In [None]:
sum(a1)

In [None]:
np.sum(a1)

Use Python's methods (`sum()`) on Python datatypes and use NumPy's methods on NumPy arrays (`np.sum()`).

In [None]:
# Creative a massive NumPy array
massive_array = np.random.random(100000)
massive_array.size

In [None]:
massive_array[:10]

In [None]:
%timeit sum(massive_array) # Python's sum()
%timeit np.sum(massive_array) # NumPy's np.sum()

In [None]:
15800 / 34.6

In [None]:
a2

In [None]:
np.mean(a2)

In [None]:
np.max(a2)

In [None]:
np.min(a2)

**Standard deviation** and **variance** are measures of 'spread' of data.

The higher standard deviation and the higher variance of data, the more spread out the values are.

The lower standard deviation and lower variance, the less spread out the values are.

In [None]:
# Standard deviation = a measure of how spread out a group of numbers is from the mean
np.std(a2)

In [None]:
# Variance = measure of the average degree to which each number is different to the mean
# Higher variance = wider range of numbers
# Lower variance = lower range of numbers
np.var(a2)

In [None]:
# Standard deviation = squareroot of variance
np.sqrt(np.var(a2))

In [None]:
# Demo of std and var
high_var_array = np.array([1, 100, 200, 300, 4000, 5000])
low_var_array = np.array([2, 4, 6, 8, 10])

In [None]:
np.var(high_var_array), np.var(low_var_array)

In [None]:
np.std(high_var_array), np.std(low_var_array)

In [None]:
np.mean(high_var_array), np.mean(low_var_array)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.hist(high_var_array)
plt.show

In [None]:
plt.hist(low_var_array)
plt.show

 ### Reshaping & transposing

In [None]:
a2

In [None]:
a2.shape

In [None]:
a3

In [None]:
a3.shape

In [None]:
 a2 * a3

In [None]:
 a2.shape

In [None]:
a2.reshape(2, 3, 1).shape

In [None]:
a3.shape

In [None]:
 a2_reshape = a2.reshape(2, 3, 1)
 a2_reshape

In [None]:
a2_reshape * a3

In [None]:
a2.shape

In [None]:
# Transpose = switches the axis'
a2.T

In [None]:
a2.T.shape

In [None]:
a3

In [None]:
 a3.shape

In [None]:
a3.T

In [None]:
a3.T.shape

### Dot Produtc

In [None]:
np.random.seed(0)

mat1 = np.random.randint(10, size=(5, 3))
mat2 = np.random.randint(10, size=(5, 3))

mat1

In [None]:
mat2

In [None]:
mat1.shape, mat2.shape

In [None]:
mat1

In [None]:
mat2

In [None]:
# Element-wise multiplication (Hadamard product)
mat1 * mat2

In [None]:
# Dot product
np.dot(mat1, mat2)

In [None]:
# Transpose mat1
mat1.T

In [None]:
mat1.shape, mat2.T.shape

In [None]:
mat3 = np.dot(mat1, mat2.T)
mat3

In [None]:
mat3.shape

### Dot product example (Nut butter sales)

In [None]:
np.random.seed(0)
# Number of jars sold
sales_amount = np.random.randint(20, size=(5, 3))
sales_amount

In [None]:
# Create weekly_sales DataFrame
weekly_sales = pd.DataFrame(sales_amount,
                            index=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"],
                           columns=["Almond butter", "Peanut  butter", "Cashew butter"])
weekly_sales

In [None]:
# Create the prices array
prices = np.array([10, 8, 12])

In [None]:
# Create butter_prices DataFrama
butter_prices = pd.DataFrame(prices.reshape(1, 3), index=["Price"],
                            columns=["Almond butter", "Peanut  butter", "Cashew butter"])
butter_prices

In [None]:
# Shape aren't aligned, let's transpose
total_sales = prices.dot(sales_amount.T)
total_sales

In [None]:
# Create daily_sales
daily_sales = butter_prices.dot(weekly_sales.T)
daily_sales

In [None]:
weekly_sales["Total (€)"] = daily_sales.T
weekly_sales

### Comparison Operator

In [None]:
a1

In [None]:
a2

In [None]:
a1 > a2

In [None]:
bool_array = a1 >= a2
bool_array

In [None]:
type(bool_array), bool_array.dtype

In [None]:
a1 > 5

In [None]:
a1 < 5

In [None]:
a1 == a1 

In [None]:
a1

In [None]:
a2

In [None]:
a1 == a2

### 5. Sorting arrays

In [None]:
random_array = np.random.randint(10, size=(3, 5))
random_array

In [None]:
random_array.shape

In [None]:
np.sort(random_array)

In [None]:
random_array

In [None]:
np.argsort(random_array)

In [None]:
a1

In [None]:
np.argsort(a1)

In [None]:
np.argmin(a1)

In [None]:
np.argmax(a1)

In [None]:
random_array

In [None]:
np.argmax(random_array, axis=0)

In [None]:
np.argmax(random_array, axis=1)

### 6. Practical Example - NumPy in Action!!!!

![panda.png](attachment:774d69ba-da40-4326-a35c-e3d3428088fa.png)

In [None]:
# Turn an image into a NumPy array
from matplotlib.image import imread

panda = imread("images/panda.png")

print(type(panda))

In [None]:
panda.size, panda.shape, panda.ndim

In [None]:
panda[:5]

<img src="images/car-photo.png"/>

In [None]:
car = imread("images/car-photo.png")
print(type(car))

In [None]:
car[:1]

<img src="images/dog-photo.png"/>

In [None]:
dog = imread("images/dog-photo.png")
print(type(dog))

In [None]:
dog