In [2]:
import numpy as np 

np.mean()

In [4]:
matrix_A = np.array([[1,0,0,3,1], [3,6,6,2,9], [4,5,3,8,0]])
print(matrix_A)

[[1 0 0 3 1]
 [3 6 6 2 9]
 [4 5 3 8 0]]


In [5]:
#The mean is calculated by flattening the array 
#default datatype of the np.mean() is float64
#flattening the array means ignoring the shape of the array and flattening it down to a long 1-dimensional array
np.mean(matrix_A)
#mean of the first row of the array 
np.mean(matrix_A[0])
#mean of the first column of the array 
np.mean(matrix_A[:,0])
#To get the mean of all the columns in a single pass 
np.mean(matrix_A, axis = 0)
#To get the mean of all the rows in a single pass 
np.mean(matrix_A, axis = 1)

array([1. , 5.2, 4. ])

In [None]:
np.mean(matrix_A, axis = 1, dtype = np.int64)

Min and Max Values 

In [None]:
matrix_A = np.array([[1,0,0,3,1], [3,6,6,2,9], [4,5,3,8,0]])
print(matrix_A)

In [None]:
#np.min() finds the lowest value of an array along a given axis 
#If no axis is specified (default axis = None), the functions works on the flattened array 
np.min(matrix_A)
np.min(matrix_A[1])

In [None]:
#np.amin() designed for working with arrays 
# An alias for np.min()
np.amin(matrix_A)

In [None]:
# np.minimum() requires at least 2 input arrays 
# Generates an array which holds the elementwise mimimal values 
# Compares the values at the same position in each of the arrays before taking the lowest one 
# The output has the same shape as the inputs 
# minima - plural
# minimum - singular
np.minimum(matrix_A[0], matrix_A[2])

In [6]:
#gives the minima for each column of matrix
np.minimum.reduce(matrix_A)

array([1, 0, 0, 2, 0])

In [None]:
#use np.min() as it provides greater flexibility 
np.min(matrix_A, axis = 0)

In [None]:
np.max(matrix_A)

In [None]:
np.amax(matrix_A)

In [None]:
np.maximum.reduce(matrix_A)

Statistical Order Functions 


In [None]:
matrix_A = np.array([[1,0,0,3,1], [3,6,6,2,9], [4,5,3,8,0]])
print(matrix_A)

In [None]:
#np.ptp() , ptp stands for peak to peak
# It returns the difference between the highest and lowest values within an array 

In [None]:
np.ptp(matrix_A)

In [None]:
#ptp along the columns (axis = 0)
np.ptp(matrix_A, axis = 0)

In [None]:
#ptp along the rows (axis = 1)
np.ptp(matrix_A, axis = 1)

In [None]:
#np.percentile(), return percentile of a given set 
# Requires percentile as an second input 
# A value that is greater than the corresponding % of the dataset 
# The 70-th percentile is greater than 70 % of the data 
# Sort the array in increasing order 

In [None]:
np.sort(matrix_A, axis = None)

In [None]:
np.percentile(matrix_A, 70)
#method= is used instead of interpolation= 
np.percentile(matrix_A, 70, method= 'midpoint')
np.percentile(matrix_A, 70, method= 'nearest')

In [None]:
np.percentile(matrix_A, 50)

Quantile


In [None]:
# np.quantile(), quantiles are used to express a value that is greater than the corresponding part of the dataset 
#quantile functions takes an argument between 0 and 1

In [None]:
np.quantile(matrix_A, 0.70)
np.quantile(matrix_A, 0.70, method = 'nearest')

Average and Variance

In [None]:
matrix_A = np.array([[1,0,0,3,1], [3,6,6,2,9], [4,5,3,8,0]])
print(matrix_A)

In [None]:
#np.median(), returns the middle value of an sorted array 
# If there are even no of elements, it's the average of the two middle ones 

In [7]:
np.median(matrix_A)

3.0

In [None]:
np.mean(matrix_A)

In [8]:
#np.average() is more versatile than np.mean() because it can calculate the weighted average of a dataset 
np.average(matrix_A)

3.4

In [None]:
from numpy.random import Generator as gen 
from numpy.random import PCG64 as pcg 
array_RG = gen(pcg(seed = 365))
array_weights = array_RG.random(size = (3,5))
array_weights

In [None]:
np.average(matrix_A, weights = array_weights)

In [None]:
np.var(matrix_A)

In [None]:
np.std(matrix_A)

Covariance and Correlation


In [None]:
matrix_A = np.array([[1,0,0,3,1], [3,6,6,2,9], [4,5,3,8,0]])
print(matrix_A)

In [9]:
#elements in the diagonal represent the variance of each row respectively 
np.cov(matrix_A)

array([[ 1.5, -2. ,  2. ],
       [-2. ,  7.7, -7. ],
       [ 2. , -7. ,  8.5]])

In [None]:
np.corrcoef(matrix_A)

Histograms

In [None]:
matrix_A = np.array([[1,0,0,3,1], [3,6,6,2,9], [4,5,3,8,0]])
print(matrix_A)

In [None]:
np.sort(matrix_A, axis = None)

In [None]:
#The first array represents the number of times a value falls within each bin 
#The second array represents the edge of each bin 
# Edge - An edge is defined as ending point of one bin and the starting point of another bin
#bin_edges = bins + 1 
np.histogram(matrix_A)

In [None]:
#If you want density of the histogram (density means how populated an area is )
#density - number of times a value falls within each bin 
np.histogram(matrix_A)[0]

In [None]:
#If you want the edge of the histogram
np.histogram(matrix_A)[1]

In [None]:
#bins are closed-open intervals [a,b)
#If a value equals on the lower edge of the bin then it counts towards the bin 
#If a value equals on the upper edge of the bin then it does not count towards the bin 
np.histogram(matrix_A, bins = 4)

In [None]:
np.histogram(matrix_A, bins = 4, range = (1,7))

In [None]:
import matplotlib.pyplot as plt 
plt.hist(matrix_A.flat, bins = np.histogram(matrix_A)[1])
plt.show()

In [18]:
matrix_A

array([[1, 0, 0, 3, 1],
       [3, 6, 6, 2, 9],
       [4, 5, 3, 8, 0]])

In [26]:
#1st row of matrix_A as x-coordinate 
#2nd row of matrix_A as y-coordinate 
np.histogram2d(matrix_A[0], matrix_A[1])
np.histogram2d(matrix_A[0], matrix_A[1], bins = 4)
#The first array is the density array 
#While the other two arrays are bin edges for each plane
#First row - all points whose x-value falls within the first bin 
#row - x bin 
#column - y bin 

(array([[0., 0., 2., 0.],
        [1., 0., 0., 1.],
        [0., 0., 0., 0.],
        [1., 0., 0., 0.]]),
 array([0.  , 0.75, 1.5 , 2.25, 3.  ]),
 array([2.  , 3.75, 5.5 , 7.25, 9.  ]))

In [27]:
np.histogramdd(matrix_A.transpose(), bins = 4)

(array([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 1., 1., 0.],
         [0., 0., 0., 0.]],
 
        [[0., 0., 1., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [1., 0., 0., 0.]],
 
        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],
 
        [[0., 0., 0., 1.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]]),
 [array([0.  , 0.75, 1.5 , 2.25, 3.  ]),
  array([2.  , 3.75, 5.5 , 7.25, 9.  ]),
  array([0., 2., 4., 6., 8.])])

NAN-Equivalent functions 

In [28]:
# functions that work on arrays with missing values 

In [29]:
matrix_A = np.array([[1,0,0,3,1], [3,6,6,2,9], [4,5,3,8,0]])
print(matrix_A)

[[1 0 0 3 1]
 [3 6 6 2 9]
 [4 5 3 8 0]]


In [30]:
np.nanmean(matrix_A)

3.4

In [31]:
np.mean(matrix_A)

3.4

In [32]:
matrix_B = np.array([[1,0,0,3,1], [3,6,np.nan,2,9], [4,5,3,8,0]])
print(matrix_B)

[[ 1.  0.  0.  3.  1.]
 [ 3.  6. nan  2.  9.]
 [ 4.  5.  3.  8.  0.]]


In [33]:
np.nanmean(matrix_B)

3.2142857142857144

In [34]:
np.mean(matrix_B)

nan

In [35]:
np.nanquantile(matrix_B, 0.7)

4.1

In [36]:
np.nanvar(matrix_B)

7.882653061224489