In [6]:
%load_ext lab_black

In [7]:
def num_gaussian_outliers(data, n):
    """
    Description: A function which calculates and returns outliers lower/higher end along with the total outliers using the emperical rule of gaussian distribution

    Parameters:
    data(pd.Series): data for which the number of outliers is to be calculated
    n(float): n times standard deviation to set threshold for outliers

    returns(tuple): tuple containing 3 values - (low, high, total number of ouliers in the data)
    """
    # number of outliers below/above n*std from the mean
    low = len(data[data < (data.mean() - (n * data.std()))])
    high = len(data[data > (data.mean() + (n * data.std()))])
    total = low + high
    return low, high, total

In [8]:
def num_whisker_outliers(data):
    """
    Description: A function which calculates and returns outliers lower/higher end along with the total outliers using the box-whisker method.

    Parameter:
    data(pd.Series): data for which the number of outliers is to be calculated

    Return(tuple): tuple containing 3 values - (low, high, total number of ouliers in the data)
    """
    M_FACTOR = 1.5
    QUART1 = 0.25
    QUART3 = 0.75
    IQR = data.quantile(QUART3) - data.quantile(QUART1)
    # number of outliers below/above whiskers;(median - 1.5*IQR) and (median + 1.5*IQR)
    low = len(data[data < data.quantile(QUART1) - (M_FACTOR * (IQR))])
    high = len(data[data > data.quantile(QUART3) + (M_FACTOR * (IQR))])
    total = low + high
    return low, high, total

In [9]:
def num_outliers(data, gaussian=True, n=3):
    """
    Description: A function which calculates and returns outliers lower/higher end along with the total outliers depending upon method chosen ('whisker', 'gaussian')

    Parameters:
    data(pd.Series): data for which the number of outliers is to be calculated
    n(float): n times standard deviation to set threshold for outliers

    returns(tuple): tuple containing 3 values - (low, high, total number of ouliers in the data)
    """
    return num_gaussian_outliers(data, n) if gaussian else num_whisker_outliers(data)