***
<b>Author:</b> Raghavendra Tapas

<b>Updated On:</b> May 2021

<b>Context:</b>  Syntax Introduction to `NumPy`

Feel free to reach out to me on __[Twitter](https://twitter.com/raghutapas12)__ for any corrections or additional updates!
***

# NumPy

In [1]:
import numpy as np
import pandas as pd

## DataTypes and Attributes

In [2]:
# NumPy's main datatype is ndarray

a1 = np.array([1,2,3])

In [3]:
type(a1)

numpy.ndarray

In [4]:
a2 = np.array([[1,2,3],
             [4,5,6],
             [6,7,8]])
a2

array([[1, 2, 3],
       [4, 5, 6],
       [6, 7, 8]])

## Creating Arrays

In [5]:
sample_array = np.array([1,2,3])
sample_array

array([1, 2, 3])

In [6]:
# Creating a Matrix containing 1's
ones = np.ones((2,3))
ones

array([[1., 1., 1.],
       [1., 1., 1.]])

In [7]:
ones.dtype

dtype('float64')

In [8]:
# Creating a Matrix containing 0's, 3 dimensional
zeros = np.zeros((2,3,4))
zeros

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]])

In [9]:
# np.arange(start:stop:step)
range_array = np.arange(0,10,2)
range_array

array([0, 2, 4, 6, 8])

In [10]:
range_array = np.arange(0,11,2)
range_array

array([ 0,  2,  4,  6,  8, 10])

In [11]:
random_array = np.random.randint(0, 10, size = (3,5))
random_array

array([[7, 3, 7, 9, 6],
       [3, 9, 2, 5, 4],
       [3, 3, 7, 5, 1]])

In [12]:
# random array of value ranging from 0 to 1
rd_array2 = np.random.random((5,3))
rd_array2

array([[0.71832876, 0.12493105, 0.73606102],
       [0.30388106, 0.35059453, 0.16359319],
       [0.91119101, 0.2334643 , 0.42472491],
       [0.57874416, 0.6779144 , 0.37640605],
       [0.54742404, 0.11664565, 0.14639244]])

In [13]:
# Pseudo Random numbers. To create same random numbers everytime

np.random.seed(1)
array1 = np.random.randint(10,size=(5,3))
array1

array([[5, 8, 9],
       [5, 0, 0],
       [1, 7, 6],
       [9, 2, 4],
       [5, 2, 4]])

In [14]:
np.random.seed(2)
array2 = np.random.random((5,3))
array2

array([[0.4359949 , 0.02592623, 0.54966248],
       [0.43532239, 0.4203678 , 0.33033482],
       [0.20464863, 0.61927097, 0.29965467],
       [0.26682728, 0.62113383, 0.52914209],
       [0.13457995, 0.51357812, 0.18443987]])

In [15]:
# seed number could be anything
np.random.seed(seed=42)
array2 = np.random.random((5,3))
array2

array([[0.37454012, 0.95071431, 0.73199394],
       [0.59865848, 0.15601864, 0.15599452],
       [0.05808361, 0.86617615, 0.60111501],
       [0.70807258, 0.02058449, 0.96990985],
       [0.83244264, 0.21233911, 0.18182497]])

## Viewing arrays and Matrices

In [16]:
array1

array([[5, 8, 9],
       [5, 0, 0],
       [1, 7, 6],
       [9, 2, 4],
       [5, 2, 4]])

In [17]:
# np.unique returns unique or distinct objects of array
np.unique(array1)

array([0, 1, 2, 4, 5, 6, 7, 8, 9])

In [18]:
# Exercise filtering a part of a 4 dimensional array
np.random.seed(5)
a4 = np.random.randint(10, size=(2,3,4,5))
a4

array([[[[3, 6, 6, 0, 9],
         [8, 4, 7, 0, 0],
         [7, 1, 5, 7, 0],
         [1, 4, 6, 2, 9]],

        [[9, 9, 9, 1, 2],
         [7, 0, 5, 0, 0],
         [4, 4, 9, 3, 2],
         [4, 6, 9, 3, 3]],

        [[2, 1, 5, 7, 4],
         [3, 1, 7, 3, 1],
         [9, 5, 7, 0, 9],
         [6, 0, 5, 2, 8]]],


       [[[6, 8, 0, 5, 2],
         [0, 7, 7, 6, 0],
         [0, 8, 5, 5, 9],
         [6, 4, 5, 2, 8]],

        [[8, 1, 6, 3, 4],
         [1, 8, 0, 2, 2],
         [4, 1, 6, 3, 4],
         [3, 1, 4, 2, 3]],

        [[4, 9, 4, 0, 6],
         [6, 9, 2, 9, 3],
         [0, 8, 8, 9, 7],
         [4, 8, 6, 8, 0]]]])

In [19]:
a4.shape, a4.ndim

((2, 3, 4, 5), 4)

In [20]:
# get the first 3 number of inner most arrays of a4
a4[: , :, :, :3]

array([[[[3, 6, 6],
         [8, 4, 7],
         [7, 1, 5],
         [1, 4, 6]],

        [[9, 9, 9],
         [7, 0, 5],
         [4, 4, 9],
         [4, 6, 9]],

        [[2, 1, 5],
         [3, 1, 7],
         [9, 5, 7],
         [6, 0, 5]]],


       [[[6, 8, 0],
         [0, 7, 7],
         [0, 8, 5],
         [6, 4, 5]],

        [[8, 1, 6],
         [1, 8, 0],
         [4, 1, 6],
         [3, 1, 4]],

        [[4, 9, 4],
         [6, 9, 2],
         [0, 8, 8],
         [4, 8, 6]]]])

## Manipulating Arrays

### Basic Operators

In [21]:
a1

array([1, 2, 3])

In [22]:
# adding 4 to all elements of array

a1 + 4

array([5, 6, 7])

In [23]:
# subtracting 4 from all elements of array
a1 - 4

array([-3, -2, -1])

In [24]:
a1 * 4

array([ 4,  8, 12])

In [25]:
a1 / 4

array([0.25, 0.5 , 0.75])

In [26]:
# floor division removes the decimels
a1 // 4

array([0, 0, 0], dtype=int32)

In [27]:
a1 ** 2

array([1, 4, 9], dtype=int32)

In [28]:
# modulo shows us the element wise remainders. modulo 
a1 % 2

array([1, 0, 1], dtype=int32)

In [29]:
np.exp(a1)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [30]:
np.log(a1)

array([0.        , 0.69314718, 1.09861229])

### Aggregation

Performing the same operation on a number of things.

Use Python's methods on (`sum()`) Python datatypes and use NumPy's methods on NumPy arrays(`np.sum()`)

In [31]:
# Create a big NumPy Array
big_array = np.random.random(100000)
big_array.size

100000

In [32]:
# limited view
big_array[:10]

array([0.80140153, 0.0539621 , 0.19047777, 0.45241885, 0.70294208,
       0.33204815, 0.3599832 , 0.92147057, 0.95363051, 0.40768573])

In [33]:
%timeit sum(big_array)  #Python's sum()

%timeit np.sum(big_array) # NumPy's np.sum()

13.4 ms ± 1.29 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
78.5 µs ± 6.81 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [34]:
a = 13200/78
print(f"NumPy's sum is {a} times faster than Python's sum function")

NumPy's sum is 169.23076923076923 times faster than Python's sum function


### Reshape

In [35]:
# numpy.reshape(a, newshape, order='C'):
# Gives a new shape to an array without changing its data.

# a2 is a 1D NumPy array
a2 = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])

# reshape it into a 4*3 matrix
a2.reshape(4,3)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [36]:
# a2 is a 1D NumPy array
a2 = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])

# reshape it into a 3*4 (rows * columns) matrix
a2.reshape(3,4)

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

### Transpose

In [37]:
a2 = np.array([[1,2,3],[4,5,6]])
a2

array([[1, 2, 3],
       [4, 5, 6]])

In [38]:
# Transposing matrices
a2.T

array([[1, 4],
       [2, 5],
       [3, 6]])

### Dot Product

In [39]:
np.random.seed(6)

mat1 = np.random.randint(10, size = (3, 2))
mat1

array([[9, 3],
       [4, 0],
       [9, 1]])

In [40]:
np.random.seed(7)
mat2 = np.random.randint(10, size = (2, 3))
mat2

array([[4, 9, 6],
       [3, 3, 7]])

shape of both the matrices is as follows
(m * `n`) multiplied by (`n` * l) 

In [41]:
mat1.shape, mat2.shape

((3, 2), (2, 3))

In [42]:
mat3 = np.dot(mat1,mat2)
mat3

array([[45, 90, 75],
       [16, 36, 24],
       [39, 84, 61]])

## Exercise: Store Sales

In [44]:
sales_amounts = np.array([[2,7,1],
                         [9,4,16],
                         [11,14,18],
                         [13,13,16],
                         [15,18,9]])

weekly_sales = pd.DataFrame(sales_amounts, 
                            index = ["Mon", "Tues", "Wed", "Thu","Fri"],
                            columns = ["Almond","Peanut","Cashew"])


weekly_sales


Unnamed: 0,Almond,Peanut,Cashew
Mon,2,7,1
Tues,9,4,16
Wed,11,14,18
Thu,13,13,16
Fri,15,18,9


In [45]:
# Create prices array
prices = np.array([10,8,12])
prices = pd.DataFrame(prices.reshape(1,3),
                      index = ["Prices"],
                      columns = ["Almond","Peanut","Cashew"])
prices

Unnamed: 0,Almond,Peanut,Cashew
Prices,10,8,12


In [46]:
# Dot product of prices with sales_amounts.
# total_sales = prices.weekly_sales

total_sales = prices.dot(weekly_sales.T)
total_sales

Unnamed: 0,Mon,Tues,Wed,Thu,Fri
Prices,88,314,438,426,402


In [47]:
# checking the shapes of matrices. In order to align matrices, we transpose weekly_sales to 3*5
prices.shape, weekly_sales.shape

((1, 3), (5, 3))

In [48]:
# append transposed total sales to the weekly sales
weekly_sales["Total ($)"] = total_sales.T
weekly_sales

Unnamed: 0,Almond,Peanut,Cashew,Total ($)
Mon,2,7,1,88
Tues,9,4,16,314
Wed,11,14,18,438
Thu,13,13,16,426
Fri,15,18,9,402


## Comparison Operators on Matrices

In [49]:
a1 = np.array([[1, 2, 3],
              [4, 5, 6]])

a2 = np.array([[1, 2, 4],
              [5, 7 ,6]])

In [51]:
a1 > a2

array([[False, False, False],
       [False, False, False]])

In [52]:
a1 < a2

array([[False, False,  True],
       [ True,  True, False]])

In [50]:
a1 == a2

array([[ True,  True, False],
       [False, False,  True]])

In [53]:
a1 != a2

array([[False, False,  True],
       [ True,  True, False]])

In [54]:
bool_array = a1 < a2
bool_array

array([[False, False,  True],
       [ True,  True, False]])

In [55]:
type(bool_array)

numpy.ndarray

## Sorting, max, min on Arrays

In [56]:
# generate a random array
np.random.seed(1)
random_array = np.random.randint(10, size = (3,5))
random_array

array([[5, 8, 9, 5, 0],
       [0, 1, 7, 6, 9],
       [2, 4, 5, 2, 4]])

### Sorting

In [57]:
# Return a sorted copy of an array.
np.sort(random_array)

array([[0, 5, 5, 8, 9],
       [0, 1, 6, 7, 9],
       [2, 2, 4, 4, 5]])

In [58]:
# Returns the indices that would sort an array. 
np.argsort(random_array)

array([[4, 0, 3, 1, 2],
       [0, 1, 3, 2, 4],
       [0, 3, 1, 4, 2]], dtype=int64)

### argmin

In [59]:
random_array

array([[5, 8, 9, 5, 0],
       [0, 1, 7, 6, 9],
       [2, 4, 5, 2, 4]])

In [60]:
# axis = 0 is rows
np.argmin(random_array, axis=0)

array([1, 1, 2, 2, 0], dtype=int64)

In [61]:
# axis = 1 is columns
np.argmin(random_array, axis=1)

array([4, 0, 0], dtype=int64)

Note: Same methodology for `argmax`, where `argmax` returns index location of maximum numbers

## Turn Image into NumPy Array

In [62]:
from matplotlib.image import imread

panda = imread("../images/panda.png")


In [63]:
panda.size, panda.shape, panda.ndim

(24465000, (2330, 3500, 3), 3)

In [66]:
panda[:2]

array([[[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]]], dtype=float32)