In [1]:
import numpy as np

## 1. DataTypes and Attributes

Numpy's main datatype is ndarray (n dimensional array)

In [2]:
a1 = np.array([1,2,3])
a1

array([1, 2, 3])

In [3]:
type(a1)

numpy.ndarray

In [4]:
a2 = np.array([[1,2,3.3],
               [4,5,6.2]])
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.2]])

In [5]:
a3 = np.array([[[1,2,3],
               [4,5,6],
               [7,8,9]],
              [[10,11,12],
               [13,14,15],
               [16,17,18]]])
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [6]:
a1.shape, a2.shape, a3.shape

((3,), (2, 3), (2, 3, 3))

In [7]:
a1.ndim, a2.ndim, a3.ndim

(1, 2, 3)

In [8]:
a1.dtype, a2.dtype, a3.dtype

(dtype('int32'), dtype('float64'), dtype('int32'))

In [9]:
a1.size, a2.size, a3.size

(3, 6, 18)

In [10]:
a4 = np.array([[[1,2]],[[3,4]],[[5,6]],[[7,8]]])
a4, a4.ndim, a4.shape

(array([[[1, 2]],
 
        [[3, 4]],
 
        [[5, 6]],
 
        [[7, 8]]]),
 3,
 (4, 1, 2))

In [11]:
# so according to this ^ ther's maximum of 3 dimensions with matrices being stuck up on each other

### create a data frame from numPy array

In [12]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.2]])

In [13]:
import pandas as pd

df = pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.3
1,4.0,5.0,6.2


## 2. Creating arrays

In [14]:
ones = np.ones((3,3,2), dtype = int)
ones

array([[[1, 1],
        [1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1],
        [1, 1]]])

In [15]:
type(ones), ones.dtype

(numpy.ndarray, dtype('int32'))

In [16]:
zeros = np.zeros((3,2))
zeros

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [17]:
range_array = np.arange(0,10,2)
range_array

array([0, 2, 4, 6, 8])

In [18]:
random_array = np.random.randint(0,10,[3,5]) # lowest, highest, size
random_array

array([[0, 6, 0, 5, 5],
       [6, 3, 9, 9, 9],
       [5, 6, 8, 6, 5]])

In [19]:
random_array.size, random_array.shape

(15, (3, 5))

In [20]:
random_array2 = np.random.random((3,5))
random_array2

array([[0.3079606 , 0.62940038, 0.03626786, 0.73987149, 0.19392143],
       [0.83743483, 0.21823115, 0.3694701 , 0.47994723, 0.16866261],
       [0.93911448, 0.4783797 , 0.48096887, 0.14610442, 0.56222361]])

In [21]:
random_array2.shape

(3, 5)

In [22]:
random_array3 = np.random.rand(3,5)
random_array3

array([[0.45881678, 0.79201026, 0.9630307 , 0.88892381, 0.31703512],
       [0.69037049, 0.67193662, 0.75067425, 0.64581112, 0.11574453],
       [0.32987111, 0.43401987, 0.01853335, 0.68066908, 0.69450351]])

In [23]:
# Pseudo-random numbers
np.random.seed(10) (remains the same results)
np.random.randint(10,100, (5,3)) 


SyntaxError: invalid syntax (Temp/ipykernel_3396/3761187497.py, line 2)

In [None]:
# found somewhere on stack overflow that this function is better to use as the .seed is outdated legacy function
rng = np.random.default_rng(100)
rng.random(size = (3,2,3)), rng.integers(10,100, (3,2,3))

## Viewing Arrays and Matrices

In [None]:
random_array = rng.integers(1,10,(5,3))
random_array

In [None]:
np.unique(random_array)

In [None]:
a1

In [None]:
a2

In [None]:
a3

In [None]:
a1[0] # first element

In [None]:
a2[0], a2[0][0] # first row, first column from first row

In [None]:
a3[0], a3[0][1], a3[0][1][0] # first matrix, second row from first matrix, first column from second row from first matrix 

To get an element from the array you can use square brackets to access it by the index.
If the array a is 2 2x3 matrices stacked on each other you have to type:
<ul> <li> a[0][0][2] - to see third value of first matrix (first row, third column) </li>
<li> a[1][0] - to see first row of second matrix </li>
<li> a[0] - to see first matrix </li> </ul>

In [None]:
# You can also use slicing
a3[0][0:2] # first two rows of first matrix

In [None]:
a3[0,1] # [d, r, c] also works

In [None]:
a3[:2, :2, :2] # first two matrices, first two rows, first two columns

In [None]:
rng = np.random.default_rng()
a4 = rng.integers(10, size = (2,3,4,5))
a4

In [None]:
a4.shape, a4.ndim, a4[0, :2, :2, :3] # access first group, first two matrices and first two rows and three columns

In [None]:
# get the first 4 numbers of the inner most arrays (cutting down last column)
a4[:,:,:,:4]

## 4. Manipulating arrays & comparing arrays

### Arithmetic 

In [None]:
a1

In [None]:
ones = np.ones(3)
ones

In [None]:
a1 + ones

In [None]:
a1 - ones

In [None]:
# multiplies corresponding values
a1 * ones, a1 / ones

In [None]:
a2

In [None]:
display(a1,a2, a1*a2)

In [None]:
# to multiply arrays like matrices:
np.matmul(np.random.randint(10, size = (2,3)), np.random.randint(10, size = (3,2)))

In [None]:
a3

In [None]:
a2 *a3

When operating on two arrays, NumPy compares their shapes element-wise. It starts with the trailing (i.e. rightmost) dimensions and works its way left. Two dimensions are compatible when

they are equal, or

one of them is 1

In [None]:
a1 / ones

In [None]:
a2, a1

In [None]:
a2 // a1 # floor division - removes the decimals (rounds down)

In [None]:
a2 ** a2

In [None]:
a2 % a1

In [None]:
np.exp(a1)

In [None]:
np.log(a1)

### Aggregation
Aggregation - performing same operation on a number of things

In [None]:
listy_list = [1, 2, 3]
type(listy_list)

In [None]:
sum(listy_list)

In [None]:
a1

In [None]:
type(a1)

In [None]:
sum(a1)

In [None]:
np.sum(a1)

Use Python's methods (`sum()`) on python datatypes and use 
Numpy's methods (`np.sum()`) on numpy arrays

In [None]:
# create a massive Numpy array
rng = np.random.default_rng()
massive_array = rng.random(100000)
massive_array.size

In [None]:
massive_array[:10]

In [None]:
# magic functions start with % - this one checks how long it takes to run the code
%timeit sum(massive_array)  # Python's sum
%timeit np.sum(massive_array) # Numpy's sum

In [None]:
a2

In [None]:
np.mean(a2)

In [None]:
# standard deviation - measure of how spread out a group of numbers is from the mean
np.std(a2) # square root of variance

In [None]:
# variance - the average of the squared differences from the mean 
# or  measure of the average degree to which each number is different to the mean
# higher variance - wider range of numbers
# lower variance - lower range of numbers
np.var(a2)

In [None]:
np.sqrt(np.var(a2))

In [None]:
# Demo of std and var

In [None]:
high_var_array = np.array([1, 100, 200, 300, 4000, 5000])
low_var_array = np.array([2, 4, 6, 8, 10])

In [None]:
np.var(high_var_array) ,np.std(high_var_array)

In [None]:
np.var(low_var_array), np.std(low_var_array)

In [None]:
np.mean(low_var_array), np.mean(high_var_array)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.hist(high_var_array)
plt.show

In [None]:
plt.hist(low_var_array)
plt.show

### Reshape and Transpose

In [None]:
a2

In [None]:
a2.shape

In [None]:
a2 * a3

dimensions must either be the same or at least one equal to one

In [None]:
a2_reshaped = a2.reshape((2,3,1))
display(a2_reshaped, a3)

In [None]:
a2_reshaped * a3

In [None]:
 a2.T

In [None]:
a2.shape

In [None]:
a2.T.shape

## Dot Product

In [29]:
import numpy as np

rng = np.random.default_rng(0)
mat1 = rng.integers(10, size = (5,3))
mat2 = rng.integers(10, size = (5,3))

display(mat1, mat2)

array([[8, 6, 5],
       [2, 3, 0],
       [0, 0, 1],
       [8, 6, 9],
       [5, 6, 9]], dtype=int64)

array([[7, 6, 5],
       [5, 9, 2],
       [8, 6, 0],
       [3, 8, 5],
       [0, 7, 7]], dtype=int64)

In [30]:
# Element-wise multiplication (hadamard product)
mat1 * mat2

array([[56, 36, 25],
       [10, 27,  0],
       [ 0,  0,  0],
       [24, 48, 45],
       [ 0, 42, 63]], dtype=int64)

In [31]:
np.dot(mat1, mat2)

ValueError: shapes (5,3) and (5,3) not aligned: 3 (dim 1) != 5 (dim 0)

Shapes are not correct - inner dimensions must be the same for dot product multiplication hence use transpose

In [32]:
np.dot(mat1,mat2.T)

array([[117, 104, 100,  97,  77],
       [ 32,  37,  34,  30,  21],
       [  5,   2,   0,   5,   7],
       [137, 112, 100, 117, 105],
       [116,  97,  76, 108, 105]], dtype=int64)

## Dot product example (nut butter sales)

In [33]:
rng = np.random.default_rng(0)
# number of jars sold
sales_amounts = rng.integers(20, size = (5,3))
sales_amounts

In [39]:
# Create weekly_sales DataFrame
import pandas as pd

weekly_sales = pd.DataFrame(sales_amounts, index = ["Mon", "Tues", "Wed", "Thurs", "Fri"], columns= ["Almond butter", "Peanut butter", "Cashew butter"])
weekly_sales

Unnamed: 0,Almond butter,Peanut butter,Cashew butter
Mon,17,12,10
Tues,5,6,0
Wed,1,0,3
Thurs,16,12,18
Fri,10,12,19


In [41]:
# Create prices array 
prices = np.array([10, 8, 12])

In [50]:
butter_prices = pd.DataFrame(prices.reshape(1,3), index = ["Price"],
                             columns = ["Almond butter", "Peanut butter", "Cashew butter"])
butter_prices

Unnamed: 0,Almond butter,Peanut butter,Cashew butter
Price,10,8,12


In [57]:
display(sales_amounts.shape, prices.shape)

(5, 3)

(3,)

In [61]:
total_sales = np.dot(sales_amounts, prices)
total

array([386,  98,  46, 472, 424], dtype=int64)

In [65]:
a = np.dot(sales_amounts,prices) 
a == prices.dot(sales_amounts.T)

array([ True,  True,  True,  True,  True])

In [82]:
display(total.shape, total)

(5,)

array([386,  98,  46, 472, 424], dtype=int64)

In [56]:
weekly_sales["Total ($)"] = total
weekly_sales

Unnamed: 0,Almond butter,Peanut butter,Cashew butter,Total ($)
Mon,17,12,10,386
Tues,5,6,0,98
Wed,1,0,3,46
Thurs,16,12,18,472
Fri,10,12,19,424


In [74]:
weekly_sales = weekly_sales.drop("Total ($)", axis=1)

In [76]:
butter_prices.shape, weekly_sales.shape

((1, 3), (5, 3))

In [77]:
daily_sales = butter_prices.dot(weekly_sales.T)

In [78]:
daily_sales

Unnamed: 0,Mon,Tues,Wed,Thurs,Fri
Price,386,98,46,472,424


In [81]:
weekly_sales["Total ($)"] = daily_sales.T
weekly_sales

Unnamed: 0,Almond butter,Peanut butter,Cashew butter,Total ($)
Mon,17,12,10,386
Tues,5,6,0,98
Wed,1,0,3,46
Thurs,16,12,18,472
Fri,10,12,19,424


In [84]:
np.sum(weekly_sales["Total ($)"])

1426

### Comparison Operators

In [88]:
a1

array([1, 2, 3])

In [89]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.2]])

In [95]:
a1 > a2

array([[False, False, False],
       [False, False, False]])

In [93]:
a1 >= a2

array([[ True,  True, False],
       [False, False, False]])

In [96]:
bool_array = a1 >= a2
bool_array

array([[ True,  True, False],
       [False, False, False]])

In [99]:
type(bool_array), bool_array.dtype

(numpy.ndarray, dtype('bool'))

In [100]:
a1 == a2

array([[ True,  True, False],
       [False, False, False]])

## 5. Sorting arrays

In [102]:
rng = np.random.default_rng(0)

In [104]:
random_array = rng.integers(10, size = (3, 5))
random_array

array([[7, 6, 5, 5, 9],
       [2, 8, 6, 0, 3],
       [8, 5, 0, 7, 7]], dtype=int64)

In [107]:
np.sort(random_array)

array([[5, 5, 6, 7, 9],
       [0, 2, 3, 6, 8],
       [0, 5, 7, 7, 8]], dtype=int64)

In [106]:
np.argsort(random_array)

array([[2, 3, 1, 0, 4],
       [3, 0, 4, 2, 1],
       [2, 1, 3, 4, 0]], dtype=int64)

In [109]:
a1

array([1, 2, 3])

In [110]:
np.argsort(a1)

array([0, 1, 2], dtype=int64)

In [111]:
np.argmin(a1)

0

In [112]:
np.min(a1)

1

In [114]:
np.argmin(random_array)

8

In [116]:
np.argmax(random_array, axis = 1)

array([4, 1, 0], dtype=int64)

## 6. Practical Example - turning images into NumPy Arrays

<img src = "images/panda.png"/>

In [121]:
# Turn an image into NumPy array
from matplotlib.image import imread

panda = imread("images/panda.png")

In [123]:
type(panda)

numpy.ndarray

In [125]:
panda.size, panda.shape, panda.ndim # 3 columns from RGB

(24465000, (2330, 3500, 3), 3)

In [128]:
panda[:3]

array([[[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]]], dtype=float32)

<img src = "images/dog-photo.png"/>

In [129]:
dog = imread("images/dog-photo.png")

In [130]:
type(dog)

numpy.ndarray

# Numpy Quickstart

In [139]:
a = np.arange(15)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [140]:
a.reshape(3,5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [142]:
# or 
a = np.arange(15).reshape(3,5)
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [145]:
# array transforms sequences of sequences into two-dimensional arrays,
#sequences of sequences of sequences into three-dimensional arrays, and so on.

a = np.array([(1,2,3), (4,5,6)])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [150]:
np.array([((1,2,3), 
           (4,5,6), 
           (7,8,9)),
          
          ((10,11,12), 
           (13,14,15), 
           (16,17,18))])

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [151]:
# The type of the array can also be explicitly specified at creation time:

c = np.array([[1,2], [3,4]], dtype = complex)
c

array([[1.+0.j, 2.+0.j],
       [3.+0.j, 4.+0.j]])

In [152]:
c.flat

<numpy.flatiter at 0x2528c112300>

In [162]:
for element in c.flat:
    print(element)

(1+0j)
(2+0j)
(3+0j)
(4+0j)


In [165]:
display(a, a.ravel())

array([[1, 2, 3],
       [4, 5, 6]])

array([1, 2, 3, 4, 5, 6])

In [201]:
random_array

array([[7, 6, 5, 5, 9],
       [2, 8, 6, 0, 3],
       [8, 5, 0, 7, 7]], dtype=int64)

In [202]:
array_h = np.array([1,2,3])
array_v = array_h.reshape(3,1)
display(array_h, array_v)

array([1, 2, 3])

array([[1],
       [2],
       [3]])

In [213]:
random_array = np.hstack((random_array, array_v))

In [214]:
random_array

array([[7, 6, 5, 5, 9, 1],
       [2, 8, 6, 0, 3, 2],
       [8, 5, 0, 7, 7, 3]], dtype=int64)

In [210]:
array_h = np.hstack((array_h, np.array([4,5,6])))

In [211]:
array_h

array([1, 2, 3, 4, 5, 6])

In [216]:
np.vstack((random_array, array_h))

array([[7, 6, 5, 5, 9, 1],
       [2, 8, 6, 0, 3, 2],
       [8, 5, 0, 7, 7, 3],
       [1, 2, 3, 4, 5, 6]], dtype=int64)