## 0. Importing NumPy

In [1]:
import numpy as np

# Check the version
print(np.__version__)

2.4.1


## 1. DataTypes and attributes
Important to remember the main type in NumPy is `ndarray`, even seemingly different kinds of arrays are still `ndarray`'s. This means an operation you do on one array, will work on another.

In [2]:
# 1-dimensonal array, also referred to as a vector
a1 = np.array([1, 2, 3])

# 2-dimensional array, also referred to as matrix
a2 = np.array([[1, 2.0, 3.3],
               [4, 5, 6.5]])

# 3-dimensional array, also referred to as a matrix
a3 = np.array([[[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]],
                [[10, 11, 12],
                 [13, 14, 15],
                 [16, 17, 18]]])

In [3]:
a1.shape, a1.ndim, a1.dtype, a1.size, type(a1)

((3,), 1, dtype('int64'), 3, numpy.ndarray)

In [4]:
a2.shape, a2.ndim, a2.dtype, a2.size, type(a2)

((2, 3), 2, dtype('float64'), 6, numpy.ndarray)

In [5]:
a3.shape, a3.ndim, a3.dtype, a3.size, type(a3)

((2, 3, 3), 3, dtype('int64'), 18, numpy.ndarray)

In [6]:
a1

array([1, 2, 3])

In [7]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [8]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [9]:
import pandas as pd
df = pd.DataFrame(np.random.randint(10, size=(5, 3)), 
                                    columns=['a', 'b', 'c'])
df

Unnamed: 0,a,b,c
0,2,3,4
1,5,5,9
2,5,5,6
3,8,1,0
4,1,4,9


In [10]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [11]:
df2 = pd.DataFrame(a2)
df2

Unnamed: 0,0,1,2
0,1.0,2.0,3.3
1,4.0,5.0,6.5


## 2. Creating arrays

* `np.array()`
* `np.ones()`
* `np.zeros()`
* `np.random.rand(5, 3)`
* `np.random.randint(10, size=5)`
* `np.random.seed()` - pseudo random numbers
* Searching the documentation example (finding `np.unique()` and using it)

In [12]:
# Create a simple array
simple_array = np.array((1, 2, 3))
simple_array, simple_array.dtype

(array([1, 2, 3]), dtype('int64'))

In [13]:
# Create an array of ones
ones = np.ones((10, 2))
ones

array([[1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.]])

In [14]:
# The default datatype is 'float64'
ones.dtype

dtype('float64')

In [15]:
# You can change the datatype with .astype()
ones.astype(int)

array([[1, 1],
       [1, 1],
       [1, 1],
       [1, 1],
       [1, 1],
       [1, 1],
       [1, 1],
       [1, 1],
       [1, 1],
       [1, 1]])

In [16]:
# Create an array of zeros
zeros = np.zeros((5, 3, 3))
zeros

array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])

In [17]:
zeros.dtype

dtype('float64')

In [18]:
# Create an array within a range of values
range_array = np.arange(0, 10, 2)
range_array

array([0, 2, 4, 6, 8])

In [19]:
# Random array
random_array = np.random.randint(10, size=(5, 3))
random_array

array([[4, 4, 8],
       [0, 2, 1],
       [8, 7, 4],
       [3, 3, 2],
       [9, 2, 5]], dtype=int32)

In [20]:
# Random array of floats (between 0 & 1)
np.random.random((5, 3))

array([[0.5743662 , 0.53753934, 0.3620437 ],
       [0.32182741, 0.16278396, 0.82970835],
       [0.02350532, 0.45590855, 0.46447749],
       [0.37752203, 0.14292943, 0.13152284],
       [0.89584066, 0.53763053, 0.60472081]])

In [21]:
# Random 5x3 array of floats (between 0 & 1), similar to above
np.random.rand(5, 3)

array([[0.10142735, 0.39070244, 0.5155614 ],
       [0.58131543, 0.9223443 , 0.98158418],
       [0.79162986, 0.91860511, 0.65250345],
       [0.67680268, 0.10840316, 0.67154896],
       [0.88820432, 0.42798147, 0.33519208]])

In [22]:
# Set random seed to 0
np.random.seed(0)

# Make 'random' numbers
np.random.randint(10, size=(5, 3))

array([[5, 0, 3],
       [3, 7, 9],
       [3, 5, 2],
       [4, 7, 6],
       [8, 8, 1]], dtype=int32)

In [23]:
# Make more random numbers
np.random.randint(10, size=(5, 3))

array([[6, 7, 7],
       [8, 1, 5],
       [9, 8, 9],
       [4, 3, 0],
       [3, 5, 0]], dtype=int32)

In [24]:
# Set random seed to same number as above
np.random.seed(0)

# The same random numbers come out
np.random.randint(10, size=(5, 3))

array([[5, 0, 3],
       [3, 7, 9],
       [3, 5, 2],
       [4, 7, 6],
       [8, 8, 1]], dtype=int32)

In [25]:
np.random.seed(0)
df = pd.DataFrame(np.random.randint(10, size=(5, 3)))
df

Unnamed: 0,0,1,2
0,5,0,3
1,3,7,9
2,3,5,2
3,4,7,6
4,8,8,1


## 3. Viewing arrays and matrices (indexing)

In [26]:
a1

array([1, 2, 3])

In [27]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [28]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [29]:
a1[0]

np.int64(1)

In [30]:
a2[0]

array([1. , 2. , 3.3])

In [31]:
a3[0]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [32]:
# Get 2nd row (index 1) of a2
a2[1]

array([4. , 5. , 6.5])

In [33]:
# Get the first 2 values of the first 2 rows of both arrays
a3[:2, :2, :2]

array([[[ 1,  2],
        [ 4,  5]],

       [[10, 11],
        [13, 14]]])

In [34]:
a4 = np.random.randint(10, size=(2, 3, 4, 5))
a4

array([[[[6, 7, 7, 8, 1],
         [5, 9, 8, 9, 4],
         [3, 0, 3, 5, 0],
         [2, 3, 8, 1, 3]],

        [[3, 3, 7, 0, 1],
         [9, 9, 0, 4, 7],
         [3, 2, 7, 2, 0],
         [0, 4, 5, 5, 6]],

        [[8, 4, 1, 4, 9],
         [8, 1, 1, 7, 9],
         [9, 3, 6, 7, 2],
         [0, 3, 5, 9, 4]]],


       [[[4, 6, 4, 4, 3],
         [4, 4, 8, 4, 3],
         [7, 5, 5, 0, 1],
         [5, 9, 3, 0, 5]],

        [[0, 1, 2, 4, 2],
         [0, 3, 2, 0, 7],
         [5, 9, 0, 2, 7],
         [2, 9, 2, 3, 3]],

        [[2, 3, 4, 1, 2],
         [9, 1, 4, 6, 8],
         [2, 3, 0, 0, 6],
         [0, 6, 3, 3, 8]]]], dtype=int32)

In [35]:
a4.shape

(2, 3, 4, 5)

In [36]:
# Get only the first 4 numbers of each single vector
a4[:, :, :, :4]

array([[[[6, 7, 7, 8],
         [5, 9, 8, 9],
         [3, 0, 3, 5],
         [2, 3, 8, 1]],

        [[3, 3, 7, 0],
         [9, 9, 0, 4],
         [3, 2, 7, 2],
         [0, 4, 5, 5]],

        [[8, 4, 1, 4],
         [8, 1, 1, 7],
         [9, 3, 6, 7],
         [0, 3, 5, 9]]],


       [[[4, 6, 4, 4],
         [4, 4, 8, 4],
         [7, 5, 5, 0],
         [5, 9, 3, 0]],

        [[0, 1, 2, 4],
         [0, 3, 2, 0],
         [5, 9, 0, 2],
         [2, 9, 2, 3]],

        [[2, 3, 4, 1],
         [9, 1, 4, 6],
         [2, 3, 0, 0],
         [0, 6, 3, 3]]]], dtype=int32)

## 4. Manipulating and comparing arrays
* Arithmetic
    * `+`, `-`, `*`, `/`, `//`, `**`, `%`
    * `np.exp()`
    * `np.log()`
    * [Dot product](https://www.mathsisfun.com/algebra/matrix-multiplying.html) - `np.dot()`
    * Broadcasting
* Aggregation
    * `np.sum()` - faster than Python's `.sum()` for NumPy arrays
    * `np.mean()`
    * `np.std()`
    * `np.var()`
    * `np.min()`
    * `np.max()`
    * `np.argmin()` - find index of minimum value
    * `np.argmax()` - find index of maximum value
    * These work on all `ndarray`'s
        * `a4.min(axis=0)` -- you can use axis as well
* Reshaping
    * `np.reshape()`
* Transposing
    * `a3.T` 
* Comparison operators
    * `>`
    * `<`
    * `<=`
    * `>=`
    * `x != 3`
    * `x == 3`
    * `np.sum(x > 3)`

In [37]:
a1

array([1, 2, 3])

In [38]:
ones = np.ones(3)
ones

array([1., 1., 1.])

In [39]:
# Add two arrays
a1 + ones

array([2., 3., 4.])

In [40]:
# Subtract two arrays
a1 - ones

array([0., 1., 2.])

In [41]:
# Multiply two arrays
a1 * ones

array([1., 2., 3.])

In [42]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [43]:
# Multiply two arrays
a1 * a2

array([[ 1. ,  4. ,  9.9],
       [ 4. , 10. , 19.5]])

In [44]:
a1.shape, a2.shape

((3,), (2, 3))

In [45]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [46]:
# This will error as the arrays have a different number of dimensions (2, 3) vs. (2, 3, 3) 
a2 * a3

ValueError: operands could not be broadcast together with shapes (2,3) (2,3,3) 

In [None]:
a1

In [None]:
a1.shape

In [None]:
a2.shape

In [None]:
a2

In [None]:
a1 + a2

In [None]:
a2 + 2

In [None]:
# Raises an error because there's a shape mismatch (2, 3) vs. (2, 3, 3)
a2 + a3

In [None]:
# Divide two arrays
a1 / ones

In [None]:
a2

In [None]:
# Divide using floor division
a2 // a1

In [None]:
# Take an array to a power
a1 ** 2

In [None]:
# You can also use np.square()
np.square(a1)

In [None]:
# Modulus divide (what's the remainder)
a1 % 2

In [None]:
# Find the log of an array
np.log(a1)

In [None]:
# Find the exponential of an array
np.exp(a1)

In [None]:
sum(a1)

In [None]:
np.sum(a1)

In [None]:
massive_array = np.random.random(100000)
massive_array.size, type(massive_array)

In [None]:
# measure how long a pice of code to run
%timeit sum(massive_array) # Python sum()
%timeit np.sum(massive_array) # NumPy np.sum()

# 1 ms = 1000 Î¼s (microseconds)

In [None]:
import random 
massive_list = [random.randint(0, 10) for i in range(100000)]
len(massive_list), type(massive_list)

In [None]:
massive_list[:10]

In [None]:
%timeit sum(massive_list)
%timeit np.sum(massive_list)

In [None]:
a2

In [None]:
# Find the mean
np.mean(a2)

In [None]:
# Find the max
np.max(a2)

In [None]:
# Find the min
np.min(a2)

In [None]:
# Find the standard deviation
np.std(a2)

In [None]:
# Find the variance
np.var(a2)

In [None]:
# The standard deviation is the square root of the variance
np.sqrt(np.var(a2))

In [None]:
# Demo of variance
high_var_array = np.array([1, 100, 200, 300, 4000, 5000])
low_var_array = np.array([2, 4, 6, 8, 10])

np.var(high_var_array), np.var(low_var_array)

In [None]:
np.std(high_var_array), np.std(low_var_array)

In [None]:
# The standard deviation is the square root of the variance
np.sqrt(np.var(high_var_array))

In [None]:
a2

In [None]:
a2.shape

In [None]:
a2 + a3

In [None]:
a2.reshape(2, 3, 1)

In [None]:
a2.reshape(2, 3, 1) + a3

In [None]:
a2.shape

In [None]:
# transpose
a2.T

In [None]:
a2.transpose()

In [None]:
a2.T.shape

In [None]:
np.random.seed(0)
mat1 = np.random.randint(10, size=(3, 3))
mat2 = np.random.randint(10, size=(3, 2))

mat1.shape, mat2.shape

In [None]:
mat1

In [None]:
mat2

In [None]:
# matrix multiplication
np.dot(mat1, mat2)

In [None]:
# Can also achieve np.dot() with "@" 
# (however, they may behave differently at 3D+ arrays)
mat1 @ mat2

In [None]:
np.random.seed(0)
mat3 = np.random.randint(10, size=(4,3))
mat4 = np.random.randint(10, size=(4,3))
mat3

In [None]:
mat4

In [None]:
# This will fail as the inner dimensions of the matrices do not match
np.dot(mat3, mat4)

In [None]:
mat3.T.shape

In [None]:
# Dot product
np.dot(mat3.T, mat4)

In [None]:
# Element-wise multiplication, also known as Hadamard product
mat3 * mat4

In [47]:
np.random.seed(0)
sales_amounts = np.random.randint(20, size=(5, 3))
sales_amounts

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]], dtype=int32)

In [48]:
weekly_sales = pd.DataFrame(sales_amounts,
                            index=["Mon", "Tues", "Wed", "Thurs", "Fri"],
                            columns=["Almond butter", "Peanut butter", "Cashew butter"])
weekly_sales

Unnamed: 0,Almond butter,Peanut butter,Cashew butter
Mon,12,15,0
Tues,3,3,7
Wed,9,19,18
Thurs,4,6,12
Fri,1,6,7


In [49]:
prices = np.array([10, 8, 12])
prices.reshape(1, 3)

array([[10,  8, 12]])

In [51]:
butter_prices = pd.DataFrame(prices.reshape(1, 3),
                             index=["Price"],
                             columns=["Almond butter", "Peanut butter", "Cashew butter"])
butter_prices

Unnamed: 0,Almond butter,Peanut butter,Cashew butter
Price,10,8,12


In [None]:
weekly_sales.shape

In [52]:
# Find the total amount of sales for a whole day
total_sales = prices.dot(sales_amounts)
total_sales

ValueError: shapes (3,) and (5,3) not aligned: 3 (dim 0) != 5 (dim 0)

In [54]:
prices

array([10,  8, 12])

In [55]:
sales_amounts.T.shape

(3, 5)

In [56]:
# To make the middle numbers the same, we can transpose
total_sales = prices.dot(sales_amounts.T)
total_sales

array([240, 138, 458, 232, 142])

In [57]:
butter_prices.shape, weekly_sales.shape

((1, 3), (5, 3))

In [58]:
daily_sales = butter_prices.dot(weekly_sales.T)
daily_sales

Unnamed: 0,Mon,Tues,Wed,Thurs,Fri
Price,240,138,458,232,142


In [None]:
# Need to transpose again
weekly_sales["Total"] = daily_sales.T
weekly_sales

In [None]:
a1

In [None]:
a2

In [None]:
a1 > a2

In [None]:
a1 >= a2

In [None]:
a1 > 5

In [None]:
a1 == a1

In [None]:
a1 == a2

## 5. Sorting arrays

* [`np.sort()`](https://numpy.org/doc/stable/reference/generated/numpy.sort.html) - sort values in a specified dimension of an array.
* [`np.argsort()`](https://numpy.org/doc/stable/reference/generated/numpy.argsort.html) - return the indices to sort the array on a given axis.
* [`np.argmax()`](https://numpy.org/doc/stable/reference/generated/numpy.argmax.html) - return the index/indicies which gives the highest value(s) along an axis.
* [`np.argmin()`](https://numpy.org/doc/stable/reference/generated/numpy.argmin.html) - return the index/indices which gives the lowest value(s) along an axis.

In [None]:
random_array

In [None]:
np.sort(random_array)

In [None]:
np.argsort(random_array)

In [None]:
a1

In [None]:
# Return the indices that would sort an array
np.argsort(a1)

In [None]:
# No axis
np.argmin(a1)

In [None]:
random_array

In [None]:
# Down the vertical
np.argmax(random_array, axis=1)

In [None]:
# Across the horizontal
np.argmin(random_array, axis=0)