# 5. NumPy

### The fundamental library for scientific computing in Python.<br>This library provides multi-dimentional array object and related functions for fast operations on arrays.
### These functions includes:
- Logical Operations
- Mathematical Operations
- Shape Manipulation
- Selection
- Sorting
- Input / Output
- Basic Linear Algebra
- Basic Statistical Operations
- Random simulation

### NumPy is Simillar to lists in Python. Two reasons to use numpy over lists:
- Fast operations on arrays using a method called vectorization via broadcasting without using loops
- Effecient operations on arrays because of its saving method.

# 5.1. Importing NumPy

In [75]:
import numpy as np

# 5.2. NumPy Data Type and Attributes

### To check the data type of a NumPy array, first we need to create one.<br>One of creating a NumPy array is using np.arrya() which takes a list of values.

In [76]:
numbers = [_ for _ in range(10)]
numbers

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [77]:
np_numbers = np.array(numbers)
np_numbers

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

### To check the data type of a NumPy array, use:

In [78]:
type(np_numbers)

numpy.ndarray

### NumPy data type is ndarray which stands for n-dimentional array.<br>Some of the most used attributes of a NumPy arrays are as follows:
- shape: displays number of rows, columns
- ndim: displays number of dimentions
- size: displays total number of elements
- dtype: displays the type of elements

In [79]:
np_numbers.shape

(10,)

In [80]:
np_numbers.ndim

1

In [81]:
np_numbers.size

10

In [82]:
np_numbers.dtype

dtype('int64')

# 5.3. Creating Arrays in NumPy
### To create a NumPy array, use:
- np.array(list)
- np.arange(start, stop, step)
- np.ones(size)
- np.zeros(size)
- np.eye(ndim)
- np.identity()

In [83]:
arr1 = np.array([chr(num) for num in range(ord("a"), ord("z") + 1)])
arr1

array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
       'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'],
      dtype='<U1')

In [84]:
# start and step arguments are optional
arr2 = np.arange(100, 200, 5)
arr2

array([100, 105, 110, 115, 120, 125, 130, 135, 140, 145, 150, 155, 160,
       165, 170, 175, 180, 185, 190, 195])

In [85]:
arr3 = np.ones((2, 2))
arr3

array([[1., 1.],
       [1., 1.]])

In [86]:
arr4 = np.zeros(6)
arr4

array([0., 0., 0., 0., 0., 0.])

In [87]:
arr5 = np.eye(3, 4)
arr5

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.]])

In [88]:
arr6 = np.identity(4)
arr6

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

### Note: The difference between np.eye() and np.identity() is the second arguemnt in np.eye(), which is optional, but if given, an array of given rows and columns will be created.
### Note: When creating an array with different value types, NumPy will upcast the given values to the more general type.

In [89]:
# NumPy will upcast the values to the most general type: str
arr7 = np.array([1, 2.0, "3.1415"])
arr7

array(['1', '2.0', '3.1415'], dtype='<U32')

# 5.4. Creating Random Arrays
### To create random arrays with NumPy, use:
- np.random.randint(low, high, size)
- np.random.random(size)
- np.random.rand(d0, d1, d2, ...)

In [90]:
# low and size are optional arguemnts
# If size is not given, a single random int number will be returned
np.random.randint(100)

32

In [91]:
random_arr1 = np.random.randint(10, 100, size=(3, 2))
random_arr1

array([[41, 84],
       [33, 45],
       [85, 65]])

In [92]:
# if size is not given, a single random number between 0 and 1 will be returned
np.random.random()

0.21655035442437187

In [93]:
random_arr2 = np.random.random(4)
random_arr2

array([0.13521817, 0.32414101, 0.14967487, 0.22232139])

In [94]:
# if dimentions are not given, a single number between 0 and 1 will be returned
np.random.rand()

0.38648898112586194

In [95]:
random_arr3 = np.random.rand(2, 3, 4)
random_arr3

array([[[0.90259848, 0.44994999, 0.61306346, 0.90234858],
        [0.09928035, 0.96980907, 0.65314004, 0.17090959],
        [0.35815217, 0.75068614, 0.60783067, 0.32504723]],

       [[0.03842543, 0.63427406, 0.95894927, 0.65279032],
        [0.63505887, 0.99529957, 0.58185033, 0.41436859],
        [0.4746975 , 0.6235101 , 0.33800761, 0.67475232]]])

### Random numbers are not really random, they are psudo-random.<br>To control which sets of random numbers to generate, the function seed() can be used.

In [96]:
np.random.randint(10)

0

In [97]:
np.random.seed(0)
np.random.randint(10)

5

In [98]:
np.random.seed(0)
np.random.randint(10)

5

# 5.5. Viewing and Slicing Arrays

In [99]:
random_arr3[:]

array([[[0.90259848, 0.44994999, 0.61306346, 0.90234858],
        [0.09928035, 0.96980907, 0.65314004, 0.17090959],
        [0.35815217, 0.75068614, 0.60783067, 0.32504723]],

       [[0.03842543, 0.63427406, 0.95894927, 0.65279032],
        [0.63505887, 0.99529957, 0.58185033, 0.41436859],
        [0.4746975 , 0.6235101 , 0.33800761, 0.67475232]]])

In [100]:
random_arr3[1:]

array([[[0.03842543, 0.63427406, 0.95894927, 0.65279032],
        [0.63505887, 0.99529957, 0.58185033, 0.41436859],
        [0.4746975 , 0.6235101 , 0.33800761, 0.67475232]]])

In [101]:
# This is the same as random_arr3[1][2][3]
random_arr3[1, 2, 3]

0.6747523222590207

In [102]:
# This is the same as random_arr3[0][1:][1:3]
random_arr3[0, 1:, 1:3]

array([[0.96980907, 0.65314004],
       [0.75068614, 0.60783067]])

### To get the unique values in an array, use:

In [103]:
np.unique(np.random.randint(100, size=(10, 10)))

array([ 0,  1,  3,  4,  5,  6,  9, 11, 12, 13, 14, 15, 17, 19, 20, 21, 23,
       25, 28, 29, 31, 32, 34, 35, 36, 37, 38, 39, 41, 42, 46, 47, 48, 49,
       52, 53, 55, 57, 58, 64, 65, 67, 68, 69, 70, 72, 74, 75, 76, 77, 78,
       79, 80, 81, 82, 83, 84, 85, 87, 88, 91, 99])

# 5.6. Mathematical Operations on Arrays
### All mathematical operations like +, -, *, /, //, %, ** can be applied to NumPy arrays.<br>When applying these mathematical operations, both arrays must have the same shape or a single number can be used.<br>NumPy provides functions that are equivalent to the operators menthioned above.

In [104]:
arr1 = np.random.randint(10, 60, size=(3, 4))
arr2 = np.random.randint(1, 10, size=(3, 4))

In [105]:
arr1, arr2

(array([[51, 45, 10, 41],
        [15, 40, 10, 59],
        [46, 44, 58, 39]]),
 array([[4, 3, 1, 8],
        [6, 1, 3, 8],
        [3, 3, 4, 4]]))

In [106]:
# same as arr1 + arr2
np.add(arr1, arr2)

array([[55, 48, 11, 49],
       [21, 41, 13, 67],
       [49, 47, 62, 43]])

In [107]:
# same as arr1 - arr2
np.subtract(arr1, arr2)

array([[47, 42,  9, 33],
       [ 9, 39,  7, 51],
       [43, 41, 54, 35]])

In [108]:
# same as arr1 * arr2
np.multiply(arr1, arr2)

array([[204, 135,  10, 328],
       [ 90,  40,  30, 472],
       [138, 132, 232, 156]])

In [109]:
# same as arr1 / arr2
np.divide(arr1, arr2)

array([[12.75      , 15.        , 10.        ,  5.125     ],
       [ 2.5       , 40.        ,  3.33333333,  7.375     ],
       [15.33333333, 14.66666667, 14.5       ,  9.75      ]])

In [110]:
# same as arr1 // arr2
np.floor_divide(arr1, arr2)

array([[12, 15, 10,  5],
       [ 2, 40,  3,  7],
       [15, 14, 14,  9]])

In [111]:
# same as arr1 % arr2
np.mod(arr1, arr2)

array([[3, 0, 0, 1],
       [3, 0, 1, 3],
       [1, 2, 2, 3]])

In [112]:
# same as arr1 ** arr2
np.power(arr1, arr2)

array([[        6765201,           91125,              10,
          7984925229121],
       [       11390625,              40,            1000,
        146830437604321],
       [          97336,           85184,        11316496,
                2313441]])

In [113]:
# a single value can be used instead of an array
random_number = np.random.randint(50)
print(random_number)
np.add(arr1, random_number)

18


array([[69, 63, 28, 59],
       [33, 58, 28, 77],
       [64, 62, 76, 57]])

### The .round() function can be used to round all float values in an array.

In [114]:
(np.random.random((7, 3)) * 10).round(2)

array([[5.65, 8.65, 5.09],
       [9.17, 9.21, 0.83],
       [2.78, 0.09, 8.42],
       [6.47, 8.41, 2.65],
       [3.98, 5.53, 1.65],
       [3.7 , 1.46, 5.7 ],
       [7.04, 2.88, 4.33]])

### NumPy library provides some other mathematical functions that can be used on its arrays.<br>These functions include np.sum(), np.mean(), np.average(), np.max(), np.min(), np.var(), np.std(), ...
### Most of these functions take one array, they have an optional argument "axis" which indicates how to apply the function to the given array, there are three modes:

- axis=-1 (default): apply the function to all the elements
- axis=1: apply the function to each row
- axis=0: apply the function to each column

In [115]:
np.sum(arr1)

458

In [116]:
np.sum(arr1, axis=1)

array([147, 124, 187])

In [117]:
np.sum(arr1, axis=0)

array([112, 129,  78, 139])

### We can compare the time it takes to create a random array of size 1000000 and sum its values using lists and numpy

In [118]:
from random import randint
%timeit sum([randint(1, 100) for _ in range(1000000)])
%timeit np.sum(np.random.randint(1, 101, size=1000000))

615 ms ± 19.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
8.21 ms ± 213 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# 5.7. Compariosson Operations on Arrays
### NumPy arrays can be compared to each other.<br>Just like mathematical operations, arrays must have the same shape or a single value must be used.<br>When camparing arrays, values of the same position from both arrays are compared to each other.

In [119]:
# same as arr1 == arr2
np.equal(arr1, arr2)

array([[False, False, False, False],
       [False, False, False, False],
       [False, False, False, False]])

In [120]:
# same as arr1 > arr2
np.greater(arr1, arr2)

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [121]:
# same as arr1 <= arr2
np.less_equal(arr1, arr2)

array([[False, False, False, False],
       [False, False, False, False],
       [False, False, False, False]])

# 5.8. Reshape the Array
### NumPy provides two ways to reshape an array:
- .reshape(new_shape): rehsape the array not inplace
- .resize(new_shape): reshape the array inplace

### Note: The new shape must have the same size as the old shape.

In [122]:
arr1.shape, arr1.size

((3, 4), 12)

In [123]:
arr1.reshape((2, 6))

array([[51, 45, 10, 41, 15, 40],
       [10, 59, 46, 44, 58, 39]])

In [124]:
# did not change the original array
arr1.shape, arr1.size

((3, 4), 12)

In [125]:
arr1.resize((2, 6))
arr1

array([[51, 45, 10, 41, 15, 40],
       [10, 59, 46, 44, 58, 39]])

In [126]:
arr1.shape, arr1.size

((2, 6), 12)

### NumPy provides the ability to tanspose an array which mean changing the rows to columns.

In [127]:
arr1

array([[51, 45, 10, 41, 15, 40],
       [10, 59, 46, 44, 58, 39]])

In [128]:
# arr1.tanspose() does the same thing
arr1.T

array([[51, 10],
       [45, 59],
       [10, 46],
       [41, 44],
       [15, 58],
       [40, 39]])

# 5.9. Dot Product
### Dot product is an operation that can be performed on two matrices.<br>Note: The new shape must have the same size as the old shape.The shape of the matrices must follow the pattern:
- m1.shape = (m, k)
- m2.shape = (k, n)

In [129]:
arr1 = np.random.randint(1, 10, size=(3, 4))
arr2 = np.random.randint(1, 10, size=(4, 3))

In [130]:
arr1, arr2

(array([[7, 8, 4, 1],
        [7, 5, 7, 2],
        [2, 1, 3, 3]]),
 array([[1, 1, 4],
        [2, 6, 4],
        [4, 6, 2],
        [5, 1, 2]]))

In [131]:
# arr1 @ arr2 does the same thing
np.dot(arr1, arr2)

array([[44, 80, 70],
       [55, 81, 66],
       [31, 29, 24]])

# 5.10. Sorting Arrays

### The function .sort() can be used to sort an array based on a given axis (-1, 0, 1) in place.<br>This function has an argument kind= which allows chosing what type of sorting algorithm to use.

In [132]:
arr1

array([[7, 8, 4, 1],
       [7, 5, 7, 2],
       [2, 1, 3, 3]])

In [133]:
arr1.sort()
arr1

array([[1, 4, 7, 8],
       [2, 5, 7, 7],
       [1, 2, 3, 3]])

### Comparing the time it takes to sort a random int array of size 1000000

In [134]:
arr = np.random.randint(100, size=(1000, 1000))
arr1 = np.copy(arr)
arr2 = arr[:]
arr3 = arr.copy()

In [135]:
arr.shape, arr.size, arr.ndim, arr.dtype

((1000, 1000), 1000000, 2, dtype('int64'))

In [136]:
%timeit arr.sort(kind="mergesort")
%timeit arr1.sort(kind="heapsort")
%timeit arr2.sort(kind="stable")
%timeit arr3.sort(kind="quicksort")

833 µs ± 104 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
51.6 ms ± 1.09 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
640 µs ± 23.4 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
11.2 ms ± 433 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# 5.11. Practical Uses of NumPy
## 5.11.1 Converting Images to Arrays for Manipulations

In [137]:
from matplotlib.pyplot import imread, imsave
scrap_beast = imread("pics/scrap_beast.png")

In [138]:
scrap_beast.shape, scrap_beast.size, scrap_beast.ndim, scrap_beast.dtype

((544, 544, 3), 887808, 3, dtype('float32'))

In [139]:
scrap_beast = scrap_beast * np.random.random()

In [140]:
imsave("pics/scrap_beast_new.png", scrap_beast)

## 5.11.2 Mathematical Computations

In [141]:
prices = (np.random.random((7, 7)) * 100).round(2)
prices

array([[ 8.72, 85.44, 14.21, 20.7 , 91.11,  8.37, 24.19],
       [24.43, 10.53, 80.99, 95.95, 72.89, 87.76, 84.08],
       [82.58, 92.7 , 12.53, 81.03,  2.32, 36.75, 35.01],
       [57.02,  3.07, 30.54, 25.38, 83.88, 85.89, 86.43],
       [39.88, 55.1 , 21.63, 47.51, 70.26, 85.25, 90.93],
       [34.96,  1.8 , 68.7 , 30.42, 38.72,  3.01,  3.16],
       [88.03, 68.06, 29.64,  3.21, 95.04, 14.71, 71.36]])

In [142]:
average = np.mean(prices)
average

48.691428571428574

In [143]:
variance = np.divide(np.sum((prices - average) ** 2), prices.size)
variance

1073.6828163265307

In [144]:
standard_deviation = np.sqrt(variance)
standard_deviation

32.76709960198691

In [145]:
# NumPy provides two functions for calculating variance and standard deviation
np.var(prices), np.std(prices)

(1073.6828163265307, 32.76709960198691)

## 5.11.3 Creating DataFrames from NumPy

In [146]:
# Creating a sales record for three products for a week
np.random.seed(1)
sales = np.random.randint(30, size=(7, 3))
sales

array([[ 5, 11, 12],
       [ 8,  9, 11],
       [ 5, 15,  0],
       [16,  1, 12],
       [ 7, 13, 28],
       [ 6, 25, 18],
       [20,  5, 18]])

In [147]:
sales_df = pd.DataFrame(sales, index=["Sa", "Su", "Mo", "Tu", "We", "Th", "Fr"], columns=["Orange", "Apple", "Kiwi"])
sales_df

Unnamed: 0,Orange,Apple,Kiwi
Sa,5,11,12
Su,8,9,11
Mo,5,15,0
Tu,16,1,12
We,7,13,28
Th,6,25,18
Fr,20,5,18


In [148]:
np.random.seed(0)
prices = (np.random.random(3)).round(2)
prices

array([0.55, 0.72, 0.6 ])

In [149]:
prices_df = pd.DataFrame(prices.reshape((1, 3)), index=["Prices"], columns=["Orange", "Apple", "Kiwi"])
prices_df

Unnamed: 0,Orange,Apple,Kiwi
Prices,0.55,0.72,0.6


In [150]:
sales_df["Daily Sales"] = np.sum(sales, axis=1)
sales_df

Unnamed: 0,Orange,Apple,Kiwi,Daily Sales
Sa,5,11,12,28
Su,8,9,11,28
Mo,5,15,0,20
Tu,16,1,12,29
We,7,13,28,48
Th,6,25,18,49
Fr,20,5,18,43


In [151]:
sales_df["Total Sales"] = np.sum(np.multiply(sales, prices), axis=1)
sales_df

Unnamed: 0,Orange,Apple,Kiwi,Daily Sales,Total Sales
Sa,5,11,12,28,17.87
Su,8,9,11,28,17.48
Mo,5,15,0,20,13.55
Tu,16,1,12,29,16.72
We,7,13,28,48,30.01
Th,6,25,18,49,32.1
Fr,20,5,18,43,25.4
