In [1]:
import numpy as np
import matplotlib.pyplot as plt
import random
plt.rc('font', family='serif', size=14)

# Statistical Methods

### Maxima and Minima
- `np.max`, `np.min`

- `np.argmax`, `np.argmin`

- `np.argnanmax`, `np.argnanmin`

- `np.nanmax`, `np.nanmin`

- `np.fmax`,`np.fmin`

- `np.maximum`, `np.minimum`

In [10]:
a = np.linspace(-5,5, 10).reshape(5,2)
a

array([[-5.        , -3.88888889],
       [-2.77777778, -1.66666667],
       [-0.55555556,  0.55555556],
       [ 1.66666667,  2.77777778],
       [ 3.88888889,  5.        ]])

In [11]:
np.max(a), np.min(a)

(5.0, -5.0)

In [15]:
a_nan = np.array([5,6,7,8,np.nan, 9,-1,13, np.nan]) #for an array containing nan values
a_nan

array([ 5.,  6.,  7.,  8., nan,  9., -1., 13., nan])

In [17]:
np.max(a_nan), np.min(a_nan) #this will not work right, nan values will be returned as maximum and minimum values

(nan, nan)

In [19]:
#so, if any nan values are present, we must use np.nanmax  and np.nanmin
np.nanmax(a_nan), np.nanmin(a_nan)

(13.0, -1.0)

In [22]:
#in order to compare tow araays element wise, we use fmin and fmax
np.fmax((1,2,-1,8,9,5,6,99, -99), [5,6,7,8,np.nan, 9,-1,13, np.nan]) #returns array containing maximum value in each element

array([  5.,   6.,   7.,   8.,   9.,   9.,   6.,  99., -99.])

In [23]:
np.fmin((1,2,-1,8,9,5,6,99, -99), [5,6,7,8,np.nan, 9,-1,13, np.nan])

array([  1.,   2.,  -1.,   8.,   9.,   5.,  -1.,  13., -99.])

`np.fmin` and `np.fmax` will ignore nan values, but `np.maximum` and `np.minimum` will not

In [24]:
np.maximum((1,2,-1,8,9,5,6,99, -99), [5,6,7,8,np.nan, 9,-1,13, np.nan])

array([ 5.,  6.,  7.,  8., nan,  9.,  6., 99., nan])

In [25]:
np.minimum((1,2,-1,8,9,5,6,99, -99), [5,6,7,8,np.nan, 9,-1,13, np.nan])

array([ 1.,  2., -1.,  8., nan,  5., -1., 13., nan])

### Percentiles
- `np.percentile(array, q, axis)`
- q = percentile to be returned, 50 for median, 0 for 0th percentile, 100 for 100th percentile
- use axis=1 for rows and axis=0 for columns

In [36]:
arr = np.arange(-10,10,1).reshape(4,5)

In [37]:
arr

array([[-10,  -9,  -8,  -7,  -6],
       [ -5,  -4,  -3,  -2,  -1],
       [  0,   1,   2,   3,   4],
       [  5,   6,   7,   8,   9]])

In [38]:
np.percentile(arr, 0)

-10.0

In [39]:
np.percentile(arr, 100)

9.0

In [40]:
np.percentile(arr, 50)

-0.5

In [41]:
np.percentile(arr,50, axis=1)

array([-8., -3.,  2.,  7.])

In [42]:
np.percentile(arr, 50, axis=0)

array([-2.5, -1.5, -0.5,  0.5,  1.5])

In [54]:
np.percentile(arr,(25,50,75), axis=0)

array([[-6.25, -5.25, -4.25, -3.25, -2.25],
       [-2.5 , -1.5 , -0.5 ,  0.5 ,  1.5 ],
       [ 1.25,  2.25,  3.25,  4.25,  5.25]])

In [55]:
np.percentile(arr,(25,50,75), axis=1)

array([[-9., -4.,  1.,  6.],
       [-8., -3.,  2.,  7.],
       [-7., -2.,  3.,  8.]])

### Average, Variance and Correlations