In [1]:
import platform
platform.uname()

uname_result(system='Windows', node='Laptop_HSoni', release='10', version='10.0.17134', machine='AMD64', processor='Intel64 Family 6 Model 37 Stepping 5, GenuineIntel')

In [2]:
platform.python_version()

'3.7.2'

In [3]:
import numpy as np
print("NumPy Version:",np.__version__)

NumPy Version: 1.15.4


# Universal Functions

- Universal functions (Ufuncs) are mathematical functions that we can use in arrays, and when we use these functions in arrays, it will be applied element by element.

- Ufuncs exist in two flavors: unary ufuncs, which operate on a single input, and binary ufuncs, which operate on two inputs.

| Operator 	| Equivalent ufunc 	|
|----------	|------------------	|
|     `+`    	| np.add           	|
|     `-`    	| np.subtract      	|
|     `*`    	| np.multiply      	|
|     `/`    	| np.divide        	|
|    `//`    	| np.floor_divide  	|
|    `**`    	| np.power         	|
|     `%`    	| np.mod           	|

In [4]:
# The tabulate package works nicely for Numpy arrays:
# you can install from jupyter notebook by executing following command.
# !pip install tabulate
from tabulate import tabulate

In [5]:
# Trigonometric functions: angles are in degree --> Converted into radian
theta_degree = np.linspace(0, 90, 7)
# theta_radian = np.radians(theta_degree)
trigo = np.array(
    [np.sin(np.deg2rad(theta_degree)), 
     np.cos(np.deg2rad(theta_degree)), 
     np.tan(np.deg2rad(theta_degree))])
headers = ["theta", "sin(theta)", "cos(theta)", "tan(theta)"]
table = tabulate(np.vstack([theta_degree,trigo]).T, headers, tablefmt="fancy_grid")
print(table)

╒═════════╤══════════════╤══════════════╤══════════════╕
│   theta │   sin(theta) │   cos(theta) │   tan(theta) │
╞═════════╪══════════════╪══════════════╪══════════════╡
│       0 │     0        │  1           │  0           │
├─────────┼──────────────┼──────────────┼──────────────┤
│      15 │     0.258819 │  0.965926    │  0.267949    │
├─────────┼──────────────┼──────────────┼──────────────┤
│      30 │     0.5      │  0.866025    │  0.57735     │
├─────────┼──────────────┼──────────────┼──────────────┤
│      45 │     0.707107 │  0.707107    │  1           │
├─────────┼──────────────┼──────────────┼──────────────┤
│      60 │     0.866025 │  0.5         │  1.73205     │
├─────────┼──────────────┼──────────────┼──────────────┤
│      75 │     0.965926 │  0.258819    │  3.73205     │
├─────────┼──────────────┼──────────────┼──────────────┤
│      90 │     1        │  6.12323e-17 │  1.63312e+16 │
╘═════════╧══════════════╧══════════════╧══════════════╛


In [6]:
# Trigonometric functions: angles are in radian
theta_degree = np.linspace(0, 90, 7)
# theta = np.radians(theta_degree)
theta = np.linspace(0, np.pi/2, 7)
trigo = np.array([np.sin(theta), np.cos(theta), np.tan(theta)])
headers = ["theta_degree", "theta_radian", "sin(theta)", "cos(theta)", "tan(theta)"]
table = tabulate(np.vstack([theta_degree,theta,trigo]).T, headers, tablefmt="fancy_grid")
print(table)

╒════════════════╤════════════════╤══════════════╤══════════════╤══════════════╕
│   theta_degree │   theta_radian │   sin(theta) │   cos(theta) │   tan(theta) │
╞════════════════╪════════════════╪══════════════╪══════════════╪══════════════╡
│              0 │       0        │     0        │  1           │  0           │
├────────────────┼────────────────┼──────────────┼──────────────┼──────────────┤
│             15 │       0.261799 │     0.258819 │  0.965926    │  0.267949    │
├────────────────┼────────────────┼──────────────┼──────────────┼──────────────┤
│             30 │       0.523599 │     0.5      │  0.866025    │  0.57735     │
├────────────────┼────────────────┼──────────────┼──────────────┼──────────────┤
│             45 │       0.785398 │     0.707107 │  0.707107    │  1           │
├────────────────┼────────────────┼──────────────┼──────────────┼──────────────┤
│             60 │       1.0472   │     0.866025 │  0.5         │  1.73205     │
├────────────────┼──────────

In [7]:
# Exponents and logarithms
x = np.arange(1,5)
x

array([1, 2, 3, 4])

In [8]:
headers = ["x", "exp(x)", "2^x", "3^x"]
table = tabulate(
    np.c_[x,np.exp(x), np.exp2(x), np.power(3, x)], 
    headers, tablefmt="fancy_grid")
print(table)

╒═════╤══════════╤═══════╤═══════╕
│   x │   exp(x) │   2^x │   3^x │
╞═════╪══════════╪═══════╪═══════╡
│   1 │  2.71828 │     2 │     3 │
├─────┼──────────┼───────┼───────┤
│   2 │  7.38906 │     4 │     9 │
├─────┼──────────┼───────┼───────┤
│   3 │ 20.0855  │     8 │    27 │
├─────┼──────────┼───────┼───────┤
│   4 │ 54.5982  │    16 │    81 │
╘═════╧══════════╧═══════╧═══════╛


In [9]:
# logarithms: inverse of the exponentials
headers = ["x", "ln(x)", "log2(x)", "log10(x)"]
table = tabulate(
    np.c_[x,np.log(x), np.log2(x), np.log10(x)], 
    headers, tablefmt="fancy_grid")
print(table)

╒═════╤══════════╤═══════════╤════════════╕
│   x │    ln(x) │   log2(x) │   log10(x) │
╞═════╪══════════╪═══════════╪════════════╡
│   1 │ 0        │   0       │   0        │
├─────┼──────────┼───────────┼────────────┤
│   2 │ 0.693147 │   1       │   0.30103  │
├─────┼──────────┼───────────┼────────────┤
│   3 │ 1.09861  │   1.58496 │   0.477121 │
├─────┼──────────┼───────────┼────────────┤
│   4 │ 1.38629  │   2       │   0.60206  │
╘═════╧══════════╧═══════════╧════════════╛


NumPy has many more ufuncs available, including inverse trig functions, hyperbolic trig functions, bitwise arithmetic, comparison operators, rounding and remainders, and much more. A look through the NumPy documentation reveals a lot of interesting functionality.

Following table provides a list of useful **aggregation** functions available in NumPy. (Ref: Python Data Science Handbook, pg. # 61)

| Function Name 	| NaN-safe Version 	| Description                               	|
|:-------------:	|:----------------:	|:-------------------------------------------:	|
|     np.sum    	|     np.nansum    	| Compute sum of elements                   	|
|    np.prod    	|    np.nanprod    	| Compute product of elements               	|
|    np.mean    	|    np.nanmean    	| Compute mean of elements                  	|
|   np.median   	|   np.nanmedian   	| Compute median of elements                	|
|     np.std    	|     np.nanstd    	| Compute Standard Deviation of elements    	|
|     np.var    	|     np.nanvar    	| Compute Variance of elements              	|
| np.percentile 	| np.nanpercentile 	| Compute rank-based statistics of elements 	|
|     np.min    	|     np.nanmin    	| Find Minimum value                        	|
|     np.max    	|     np.nanmax    	| Find Maximum value                        	|
|   np.argmin   	|   np.nanargmin   	| Find index of Minimum value               	|
|   np.argmax   	|   np.nanargmax   	| Find index of Maximum value               	|
|     np.any    	|        NA        	| Evaluate whether any elements are true    	|
|     np.all    	|        NA        	| Evaluate whether all elements are true    	|




# Random Numbers

There are some good random number features within NumPy, which you access in `np.random` after importing NumPy.

- **`np.random.rand(matsize)`** produces uniformly distributed random numbers between 0 and 1 in an array of size matsize

- **`np.random.randn(matsize)`** produces zero mean, unit variance Gaussian random numbers

- **`np.random.normal(mean,stdev,matsize)`** produces Gaussian random numbers with specifed mean and standard deviation

- **`np.random.uniform(low,high,matsize)`** produces uniform random numbers between low and high

- **`np.random.randint(low,high,matsize)`** produces random integer values between low and high

# Comparision, Mask, and Boolean Logic

As in the case of arithmetic operators, the comparison operators are implemented as ufuncs in NumPy; for example, when you write `x < 3`, internally NumPy uses `np.less(x, 3)`. A summary of the comparison operators and their equivalent ufunc is shown here:


## Comparision Operators

| Operator 	| Equivalent ufunc 	|
|:--------:	|:----------------:	|
|     <    	|      np.less     	|
|    <=    	|   np.less_equal  	|
|     >    	|    np.greater    	|
|    >=    	| np.greater_equal 	|
|    ==    	|     np.equal     	|
|    !=    	|   np.not_equal   	|


## Boolean Operators

| Operator 	| Equivalent ufunc 	|
|:--------:	|:----------------:	|
|     &    	|  np.bitwise_and  	|
|     |    	|   np.bitwise_or  	|
|     ^    	|  np.bitwise_xor  	|
|     ~    	|  np.bitwise_not  	|

Like with the standard arithmetic operators, NumPy overloads *bitwise logic operators* as ufuncs which work element-wise on (usually Boolean) arrays.

In [10]:
np.random.seed(42)
x = np.random.randint(1, 15, size=(3, 5))
print(x.dtype)
x

int32


array([[ 7,  4, 13, 11,  8],
       [13,  5,  7, 10,  3],
       [ 7, 11, 11,  8,  5]])

In [11]:
# are there any value greater than 5?
np.any(x > 5)

True

In [12]:
# are there any value equals to 7?
np.any(x == 7)

True

In [13]:
# are any values in each row less than 5?
np.any(x < 5, axis=1)

array([ True,  True, False])

In [14]:
x < 8

array([[ True,  True, False, False, False],
       [False,  True,  True, False,  True],
       [ True, False, False, False,  True]])

In [15]:
# how many values are less than 8?
np.count_nonzero(x < 8)

7

In [16]:
# Alternate way... 
# In this case False is treated as 0 while True is treated as 1.
np.sum(x < 8)

7

In [17]:
# how many values are less than 8 in each row?
np.sum(x < 8, axis=1)

array([2, 3, 2])

In [18]:
# how many values less than 10 and greater than 4?
np.sum((x > 4) & (x < 10))

7

In [19]:
print("Minimum of each column at Index:", x.min(axis=0), x.argmin(0))
print("Maximum of each column at Index:", x.max(axis=0), x.argmax(0))
print("Minimum of each row at Index:", x.min(axis=1), x.argmin(1))
print("Maximum of each row at Index:", x.max(axis=1), x.argmax(1))

Minimum of each column at Index: [7 4 7 8 3] [0 0 1 2 1]
Maximum of each column at Index: [13 11 13 11  8] [1 2 0 0 0]
Minimum of each row at Index: [4 3 5] [1 4 4]
Maximum of each row at Index: [13 13 11] [2 0 1]


In [20]:
# Alternate way to find (all) indices... 
# Observe the difference...
ix = np.argwhere(x == x.min(0))
for pos in ix:
    print(f"index:{pos} --> value: {x[pos[0], pos[1]]}")

index:[0 0] --> value: 7
index:[0 1] --> value: 4
index:[1 2] --> value: 7
index:[1 4] --> value: 3
index:[2 0] --> value: 7
index:[2 3] --> value: 8


In [21]:
ix = np.argwhere(x == x.max(0))
for pos in ix:
    print(f"index:{pos} --> value: {x[pos[0], pos[1]]}")

index:[0 2] --> value: 13
index:[0 3] --> value: 11
index:[0 4] --> value: 8
index:[1 0] --> value: 13
index:[2 1] --> value: 11


# Boolean Arrays as Masks

A more powerful pattern is to use Boolean arrays as masks, to select particular subsets of the data themselves. Returning to our `x` array from before, suppose we want an array of all values in the array that are less than, say, 8:

Now to select these values from the array, we can simply index on this Boolean array; this is known as a **masking operation**. 

What is returned is a one-dimensional array filled with all the values that meet this condition; in other words, all the values in positions at which the mask array is True.

In [22]:
x[x < 8]

array([7, 4, 5, 7, 3, 7, 5])

In [23]:
x[x == x.max(0)]

array([13, 11,  8, 13, 11])

In [24]:
x[x == x.min(0)]

array([7, 4, 7, 3, 7, 8])

In [25]:
# broadcasting error...
x[x == x.min(1)]

  


array([], shape=(0, 3, 5), dtype=int32)

In [26]:
x[x == np.c_[x.min(1)]]

array([4, 3, 5])

In [27]:
x[x == np.c_[x.max(axis=1)]]

array([13, 13, 11, 11])

# np.where()

In [28]:
wh = np.where(x > 8)
gt = x > 8
a = np.where(gt)
print("wh: Array of row index and column index respectively\n", wh)
print("gt: Boolean array\n", gt)
print("Through masking:\n", a)

wh: Array of row index and column index respectively
 (array([0, 0, 1, 1, 2, 2], dtype=int64), array([2, 3, 0, 3, 1, 2], dtype=int64))
gt: Boolean array
 [[False False  True  True False]
 [ True False False  True False]
 [False  True  True False False]]
Through masking:
 (array([0, 0, 1, 1, 2, 2], dtype=int64), array([2, 3, 0, 3, 1, 2], dtype=int64))


In [29]:
print("x[wh]:", x[wh])
print("x[gt]:", x[gt])
print("x[a]:", x[a])

x[wh]: [13 11 13 10 11 11]
x[gt]: [13 11 13 10 11 11]
x[a]: [13 11 13 10 11 11]


In [30]:
ix_row, ix_col = np.where( x > 8 )
print(np.c_[ix_row, ix_col])

[[0 2]
 [0 3]
 [1 0]
 [1 3]
 [2 1]
 [2 2]]


In [31]:
x[np.logical_and(x > 4, x < 10)]

array([7, 8, 5, 7, 7, 8, 5])

In [32]:
x[(x > 4) & (x < 10)]

array([7, 8, 5, 7, 7, 8, 5])

In [33]:
# add all values less than 10 and greater than 4?
np.sum(x[(x > 4) & (x < 10)])

47

# np.newaxis

The newaxis is used to increase the dimension of the existing array by one more dimension, when used once. 

The np.newaxis is just an alias for the Python constant `None`, which means that wherever you use np.newaxis you could also use None:

Visit https://stackoverflow.com/questions/29241056/how-does-numpy-newaxis-work-and-when-to-use-it

In [34]:
x = np.arange(5)
x

array([0, 1, 2, 3, 4])

In [35]:
x.shape

(5,)

In [36]:
# make it as row vector by inserting an axis along first dimension
row_vec = x[np.newaxis,:]  # x[None, :]
row_vec

array([[0, 1, 2, 3, 4]])

In [37]:
row_vec.shape

(1, 5)

In [38]:
# make it as column vector by inserting an axis along second dimension
col_vec = x[:, np.newaxis]     # x[:, None]
col_vec.shape

(5, 1)

# Sorting in NumPy: np.sort and np.argsort

In [39]:
np.random.seed(42)
x = np.random.randint(1, 15, size=8)
x

array([ 7,  4, 13, 11,  8, 13,  5,  7])

In [40]:
np.sort(x)

array([ 4,  5,  7,  7,  8, 11, 13, 13])

In [41]:
x.sort()
print(x)

[ 4  5  7  7  8 11 13 13]


A related function is argsort, which instead returns the indices of the sorted elements.

The first element of this result gives the index of the smallest element, the second value gives the index of the second smallest, and so on. These indices can then be used (via fancy indexing) to construct the sorted array if desired:

In [42]:
x = np.random.randint(1, 15, size=8)
x

array([10,  3,  7, 11, 11,  8,  5,  4])

In [43]:
ix = np.argsort(x)
ix

array([1, 7, 6, 2, 5, 0, 3, 4], dtype=int64)

In [44]:
x[ix]

array([ 3,  4,  5,  7,  8, 10, 11, 11])

# Sorting along rows or columns

In [45]:
rand = np.random.RandomState(100)
X = rand.randint(1, 11, (4, 6))
X

array([[ 9,  9,  4,  8,  8,  1],
       [ 5,  3,  6,  3,  3,  3],
       [ 2,  1,  9,  5,  1, 10],
       [ 7,  3,  5,  2,  6,  4]])

In [46]:
# sort each column of X
np.sort(X, axis=0)

array([[ 2,  1,  4,  2,  1,  1],
       [ 5,  3,  5,  3,  3,  3],
       [ 7,  3,  6,  5,  6,  4],
       [ 9,  9,  9,  8,  8, 10]])

In [47]:
# sort each row of X
np.sort(X, axis=1)

array([[ 1,  4,  8,  8,  9,  9],
       [ 3,  3,  3,  3,  5,  6],
       [ 1,  1,  2,  5,  9, 10],
       [ 2,  3,  4,  5,  6,  7]])

# Partial Sorts: Partitioning

NumPy provides `np.partition` function that takes an array and a number K; the gives a new array with the smallest K values to the left of the partition, and the remaining values to the right, in arbitrary order.

In [48]:
rand = np.random.RandomState(42)
x = rand.randint(1, 10, 6)
x

array([7, 4, 8, 5, 7, 3])

In [49]:
np.partition(x, 3)

array([5, 3, 4, 7, 7, 8])

In [50]:
np.partition(X, 2, axis=1)

array([[ 1,  4,  8,  9,  8,  9],
       [ 3,  3,  3,  5,  6,  3],
       [ 1,  1,  2,  5,  9, 10],
       [ 2,  3,  4,  7,  6,  5]])