In [2]:
import numpy as np

## 1. Data Types and Attributes

In [3]:
# NumPy's main datatype is ndarray

a1 = np.array([1,2,3])
a1

array([1, 2, 3])

In [4]:
type(a1)

numpy.ndarray

In [5]:
a2 = np.array([[1,2.0,3],
              [4,5,6.5]])
a3 = np.array([[[1,2,3],
                [4,5,6],
                [7,8,9]], 
               [[10,11,12],
                [13,14,15],
                [17,18,19]]])

In [6]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [17, 18, 19]]])

In [7]:
a2.shape

(2, 3)

In [8]:
a3.ndim #number of dimension

3

In [9]:
a1.dtype, a2.dtype, a3.dtype

(dtype('int64'), dtype('float64'), dtype('int64'))

In [10]:
a1.size, a2.size, a3.size

(3, 6, 18)

In [11]:
type(a1),type(a2),type(a3)

(numpy.ndarray, numpy.ndarray, numpy.ndarray)

In [12]:
#Create DataFrame from Numpy Array

import pandas as pd
df = pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,5.0,6.5


## 2. Create np arrays

In [13]:
ones = np.ones((2,3))

In [14]:
zeros = np.zeros((2,3))
zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [15]:
# Pseudo-random numbers
np.random.seed(seed = 0) # Random function change everytume we ran

In [16]:
range_array = np.arange(0,10,2)
range_array

array([0, 2, 4, 6, 8])

In [17]:
random_array = np.random.randint(0,10, size=(3,5))
random_array

array([[5, 0, 3, 3, 7],
       [9, 3, 5, 2, 4],
       [7, 6, 8, 8, 1]])

In [18]:
#numpy.random.random: the shape argument is a single tuple.
np.random.random((3,5))

# numpy.random.rand: the length of each dimension of the output array is a separate argument
np.random.rand(3,5)

array([[0.63992102, 0.14335329, 0.94466892, 0.52184832, 0.41466194],
       [0.26455561, 0.77423369, 0.45615033, 0.56843395, 0.0187898 ],
       [0.6176355 , 0.61209572, 0.616934  , 0.94374808, 0.6818203 ]])

## 3. Viewing Arrays and Matrices

In [19]:
a4 = np.random.randint(10, size=(2,3,4,5))

In [20]:
a4.shape, a4.ndim

((2, 3, 4, 5), 4)

In [21]:
# Get the first 2 numbers of inner most arrays
a4[:,:,:,:2]

array([[[[8, 1],
         [9, 3],
         [0, 3],
         [4, 6]],

        [[4, 4],
         [7, 5],
         [5, 9],
         [0, 1]],

        [[0, 3],
         [5, 9],
         [2, 9],
         [2, 3]]],


       [[[9, 1],
         [2, 3],
         [0, 6],
         [8, 8]],

        [[0, 8],
         [2, 8],
         [4, 3],
         [0, 8]],

        [[9, 6],
         [8, 0],
         [5, 7],
         [2, 8]]]])

## 4. Manipulating & Comparing Arrays

### Arithmetic

In [22]:
a1

array([1, 2, 3])

In [23]:
ones = np.ones(3)

In [24]:
a1 + ones

array([2., 3., 4.])

In [25]:
a2

array([[1. , 2. , 3. ],
       [4. , 5. , 6.5]])

In [26]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [17, 18, 19]]])

In [27]:
# How can you reshape a2 to be compatible with a3 ?
# Search: "How to reshape numpy array"
a2*a3 


ValueError: operands could not be broadcast together with shapes (2,3) (2,3,3) 

In [None]:
np.square(a2)
np.add(a1, ones) #a1 + ones

### Aggregation

Aggregation = performkng the same operation on a number of things

In [None]:
listy_list = [1,2,3]
sum(listy_list)

In [None]:
np.sum(a1)

Use Python's methods (`sum()`) on Python datatypes and use Numpy's methods on Numpy arrays (`np.sum()`).

In [None]:
# Create a massive Numpy array
massive_array = np.random.random(10000)
massive_array[:10]

In [None]:
%timeit sum(massive_array) #Python 's sum()
%timeit np.sum(massive_array) #Numpy's np.sum()

In [None]:
np.mean(a2)

In [None]:
np.max(a2)

In [None]:
# Varainace = measure of the average degree to which each number is different to the mean
# Higher variance = wider range of numbers
# Lower variance = lower range of numbers
np.var(a2)

In [None]:
# Standard deviation = a measure of how spread out a group of numbers is from the mean
np.std(a2)

In [None]:
# Standar deviation = Square Root of Variance
np.sqrt(np.var(a2))

In [None]:
# Demo of std and var

high_var_array = np.array([1,100,200,300,4000, 5000])
low_var_array = np.array([2,4,6,8,10])

In [None]:
np.var(high_var_array), np.var(low_var_array)

In [None]:
np.std(high_var_array), np.std(low_var_array)

In [None]:
np.mean(high_var_array), np.mean(low_var_array)

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
plt.hist(high_var_array)
plt.show()

In [None]:
plt.hist(low_var_array)
plt.show()

## Reshape & Transpose

In [None]:
a2_reshape = a2.reshape(2,3,1)

In [None]:
a2_reshape * a3

In [56]:
a2

array([[1. , 2. , 3. ],
       [4. , 5. , 6.5]])

In [55]:
# Transpose = switches the axis
a2.T

array([[1. , 4. ],
       [2. , 5. ],
       [3. , 6.5]])

In [None]:
a2.T.shape

In [None]:
a3.shape

In [None]:
a3.T

In [None]:
a3.T.shape

## Dot Product

In [29]:
np.random.seed(0)
mat1 = np.random.randint(10, size=(5,3))
mat2 = np.random.randint(10, size=(5,3))

In [30]:
mat1

array([[5, 0, 3],
       [3, 7, 9],
       [3, 5, 2],
       [4, 7, 6],
       [8, 8, 1]])

In [31]:
mat2

array([[6, 7, 7],
       [8, 1, 5],
       [9, 8, 9],
       [4, 3, 0],
       [3, 5, 0]])

In [32]:
# Elemenet-wise multiplication (Hadamard Mul)
mat1*mat2

array([[30,  0, 21],
       [24,  7, 45],
       [27, 40, 18],
       [16, 21,  0],
       [24, 40,  0]])

In [34]:
np.dot(mat1.T, mat2)

array([[121, 114,  77],
       [153, 108,  80],
       [135,  69,  84]])

### Dot Product Example

In [39]:
# Num or jars sold
np.random.seed(0)
sales_amounts = np.random.randint(20, size = (5,3))
sales_amounts

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [72]:
# Create weekly_sales DataFrame
weekly_sales = pd.DataFrame(sales_amounts, index = ["Mon", "Tues", "wed", "Thurs", "Fri"],
                                           columns=["Almond butter", "Peanut Butter", "Cashew Butter"])
weekly_sales

Unnamed: 0,Almond butter,Peanut Butter,Cashew Butter
Mon,12,15,0
Tues,3,3,7
wed,9,19,18
Thurs,4,6,12
Fri,1,6,7


In [73]:
# Create price array
prices = np.array([10,8,12])
prices.shape

(3,)

In [74]:
# Create butter_prices DF
butter_prices = pd.DataFrame(prices.reshape(1,3), index = ["Price"], columns=["Almond butter", "Peanut Butter", "Cashew Butter"])
butter_prices

Unnamed: 0,Almond butter,Peanut Butter,Cashew Butter
Price,10,8,12


In [75]:
butter_prices.shape, weekly_sales.T.shape

((1, 3), (3, 5))

In [77]:
total_prices = weekly_sales.dot(butter_prices.T)
total_prices

Unnamed: 0,Price
Mon,240
Tues,138
wed,458
Thurs,232
Fri,142


In [78]:
weekly_sales["Total"] = total_prices
weekly_sales

Unnamed: 0,Almond butter,Peanut Butter,Cashew Butter,Total
Mon,12,15,0,240
Tues,3,3,7,138
wed,9,19,18,458
Thurs,4,6,12,232
Fri,1,6,7,142


## Comparison Operation

In [79]:
a1

array([1, 2, 3])

In [80]:
a2

array([[1. , 2. , 3. ],
       [4. , 5. , 6.5]])

In [81]:
a1 > a2

array([[False, False, False],
       [False, False, False]])

In [82]:
a1 < 5

array([ True,  True,  True])

## 5. Sorting Array

In [85]:
random_array

array([[5, 0, 3, 3, 7],
       [9, 3, 5, 2, 4],
       [7, 6, 8, 8, 1]])

In [87]:
np.sort(random_array)

array([[0, 3, 3, 5, 7],
       [2, 3, 4, 5, 9],
       [1, 6, 7, 8, 8]])

In [88]:
np.argsort(random_array) #return the index of sorted element

array([[1, 2, 3, 0, 4],
       [3, 1, 4, 2, 0],
       [4, 1, 0, 2, 3]])

In [90]:
np.argmax(random_array, axis = 0)

array([1, 2, 2, 2, 0])

In [91]:
np.argmax(random_array, axis = 1)

array([4, 0, 2])

## 6. Practical Example 
<img src="./numpy-images/panda.png"/>

In [94]:
# Turn an image into Numpy Array
from matplotlib.image import imread

panda = imread("./numpy-images/panda.png")
print(type(panda))

<class 'numpy.ndarray'>


In [95]:
panda.size, panda.shape, panda.ndim

(24465000, (2330, 3500, 3), 3)

In [96]:
panda[:5]

array([[[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.