import numpy as np

## 1. Data Types and Attributes

In [27]:
# NumPy's main datatype is ndarray

a1 = np.array([1,2,3])
a1

array([1, 2, 3])

In [28]:
type(a1)

numpy.ndarray

In [29]:
a2 = np.array([[1,2.0,3],
              [4,5,6.5]])
a3 = np.array([[[1,2,3],
                [4,5,6],
                [7,8,9]], 
               [[10,11,12],
                [13,14,15],
                [17,18,19]]])

In [30]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [17, 18, 19]]])

In [31]:
a2.shape

(2, 3)

In [32]:
a3.ndim #number of dimension

3

In [33]:
a1.dtype, a2.dtype, a3.dtype

(dtype('int64'), dtype('float64'), dtype('int64'))

In [34]:
a1.size, a2.size, a3.size

(3, 6, 18)

In [35]:
type(a1),type(a2),type(a3)

(numpy.ndarray, numpy.ndarray, numpy.ndarray)

In [36]:
#Create DataFrame from Numpy Array

import pandas as pd
df = pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,5.0,6.5


## 2. Create np arrays

In [37]:
ones = np.ones((2,3))

In [38]:
zeros = np.zeros((2,3))
zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [39]:
# Pseudo-random numbers
np.random.seed(seed = 0) # Random function change everytume we ran

In [40]:
range_array = np.arange(0,10,2)
range_array

array([0, 2, 4, 6, 8])

In [41]:
random_array = np.random.randint(0,10, size=(3,5))
random_array

array([[5, 0, 3, 3, 7],
       [9, 3, 5, 2, 4],
       [7, 6, 8, 8, 1]])

## 3. Viewing Arrays and Matrices

In [42]:
a4 = np.random.randint(10, size=(2,3,4,5))

In [43]:
a4.shape, a4.ndim

((2, 3, 4, 5), 4)

In [44]:
# Get the first 2 numbers of inner most arrays
a4[:,:,:,:2]

array([[[[6, 7],
         [5, 9],
         [3, 0],
         [2, 3]],

        [[3, 3],
         [9, 9],
         [3, 2],
         [0, 4]],

        [[8, 4],
         [8, 1],
         [9, 3],
         [0, 3]]],


       [[[4, 6],
         [4, 4],
         [7, 5],
         [5, 9]],

        [[0, 1],
         [0, 3],
         [5, 9],
         [2, 9]],

        [[2, 3],
         [9, 1],
         [2, 3],
         [0, 6]]]])

## 4. Manipulating & Comparing Arrays

### Arithmetic

In [45]:
a1

array([1, 2, 3])

In [46]:
ones = np.ones(3)

In [47]:
a1 + ones

array([2., 3., 4.])

In [48]:
a2

array([[1. , 2. , 3. ],
       [4. , 5. , 6.5]])

In [49]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [17, 18, 19]]])

In [50]:
# How can you reshape a2 to be compatible with a3 ?
# Search: "How to reshape numpy array"
a2*a3 


ValueError: operands could not be broadcast together with shapes (2,3) (2,3,3) 

In [51]:
np.square(a2)
np.add(a1, ones) #a1 + ones

array([2., 3., 4.])

### Aggregation

Aggregation = performkng the same operation on a number of things

In [52]:
listy_list = [1,2,3]
sum(listy_list)

6

In [53]:
np.sum(a1)

6

Use Python's methods (`sum()`) on Python datatypes and use Numpy's methods on Numpy arrays (`np.sum()`).

In [54]:
# Create a massive Numpy array
massive_array = np.random.random(10000)
massive_array[:10]

array([0.16494046, 0.36980809, 0.14644176, 0.56961841, 0.70373728,
       0.28847644, 0.43328806, 0.75610669, 0.39609828, 0.89603839])

In [55]:
%timeit sum(massive_array) #Python 's sum()
%timeit np.sum(massive_array) #Numpy's np.sum()

1.47 ms ± 24.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
6.8 µs ± 28.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [56]:
np.mean(a2)

3.5833333333333335

In [57]:
np.max(a2)

6.5

In [58]:
# Varainace = measure of the average degree to which each number is different to the mean
# Higher variance = wider range of numbers
# Lower variance = lower range of numbers
np.var(a2)

3.368055555555556

In [59]:
# Standard deviation = a measure of how spread out a group of numbers is from the mean
np.std(a2)

1.8352262954621035

In [60]:
# Standar deviation = Square Root of Variance
np.sqrt(np.var(a2))

1.8352262954621035

In [None]:
# Demo of std and var

high_var_array = np.array([1,100,200,300,4000, 5000])
low_var_array = np.array([2,4,6,8,10])

In [None]:
np.var(high_var_array), np.var(low_var_array)

In [None]:
np.std(high_var_array), np.std(low_var_array)

In [None]:
np.mean(high_var_array), np.mean(low_var_array)

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
plt.hist(high_var_array)
plt.show()

In [None]:
plt.hist(low_var_array)
plt.show()

In [None]:
a2_reshape = a2.reshape(2,3,1)

In [None]:
a2_reshape * a3

In [None]:
a2

In [None]:
# Transpose = switches the axis
a2.T

In [None]:
a2.T.shape

In [None]:
a3.shape

In [None]:
a3.T

In [None]:
a3.T.shape

## Dot Product

In [None]:
np.random.seed(0)
mat1 = np.random.randint(10, size=(5,3))
mat2 = np.random.randint(10, size=(5,3))

In [None]:
mat1

In [None]:
mat2

In [None]:
# Elemenet-wise multiplication (Hadamard Mul)
mat1*mat2

In [None]:
np.dot(mat1.T, mat2)

### Dot Product Example

In [None]:
# Num or jars sold
np.random.seed(0)
sales_amounts = np.random.randint(20, size = (5,3))
sales_amounts

In [None]:
 weekly_sales = pd.DataFrame(sales_amounts, index = ["Mon", "Tues", "wed", "Thurs", "Fri"],
                                           columns=["Almond butter", "Peanut Butter", "Cashew Butter"])
weekly_sales

In [None]:
# Create price array
prices = np.array([10,8,12])
prices.shape

In [None]:
# Create butter_prices DF
butter_prices = pd.DataFrame(prices.reshape(1,3), index = ["Price"], columns=["Almond butter", "Peanut Butter", "Cashew Butter"])
butter_prices

In [None]:
butter_prices.shape, weekly_sales.T.shape

In [None]:
total_prices = weekly_sales.dot(butter_prices.T)
total_prices

In [None]:
weekly_sales["Total"] = total_prices
weekly_sales

## Comparison Operation

In [None]:
a1

In [None]:
a2

In [None]:
a1 > a2

In [None]:
a1 < 5

## 5. Sorting Array

In [None]:
random_array

In [None]:
np.sort(random_array)

In [None]:
np.argsort(random_array) #return the index of sorted element

In [None]:
np.argmax(random_array, axis = 0)

In [None]:
np.argmax(random_array, axis = 1)

## 6. Practical Example 
<img src="./numpy-images/panda.png"/>

In [None]:
# Turn an image into Numpy Array
from matplotlib.image import imread

panda = imread("./numpy-images/panda.png")
print(type(panda))

In [None]:
panda.size, panda.shape, panda.ndim

In [None]:
panda[:5]