In [1]:
from typing import List, Union

In [2]:
def arithmetic_mean(
        data: List[float]
    ) -> float:

    """
    Calculate data's arithmetic mean

    Args
    data: List[float]
        Set of data

    Returns
    float
        Data's arithmetic mean
    """

    return sum(data) / len(data)

In [3]:
def weighted_average(
        data: List[float],
        weights: List[float]
    ) -> float:

    """
    Calculate data's weighted average

    Args
    data: List[float]
        Set of data
    weights: List[float]
        Set of data's weight 
    
    Notes:
        All the values must be positive
        Data set and Weight set mus have the same size

    Returns
    float
        Data's weighted average
    """

    if len(data) != len(weights):
        raise ValueError("Data and weights arrays must have the same size")

    if any(w <= 0 for w in weights):
        raise ValueError("Every weight must be positive")

    weighted_sum = sum(d * w for d, w in zip(data, weights))
    sum_weights = sum(weights)
    return weighted_sum / sum_weights

In [4]:
def median(
        data: List[float]
    ) -> Union[int, float]:

    """
    Calculate data's median

    Args
    data: List[float]
        Set of data

    Returns
    int, float
        Data's median
    """

    sorted_data = sorted(data)
    n = len(sorted_data)
    half = n // 2
    if n % 2 == 0:
        return (sorted_data[half - 1] + sorted_data[half]) / 2
    else:
        return sorted_data[half]

In [5]:
def quartiles(
        data: List[float]
    ) -> dict:

    """
    Calculate data's quartiles

    Args
    data: List[float]
        Set of data

    Returns
    dict: {25%, 50%, 75%} 
        Data's quartiles
    """

    sorted_data = sorted(data)
    n = len(sorted_data)

    def quartile_position(
        p: float
    ) -> float:

        position = (n + 1) * p
        k = int(position)
        d = position - k

        if k < 1:
            return sorted_data[0]
        elif k >= n:
            return sorted_data[-1]
        else:
            return sorted_data[k - 1] + d * (sorted_data[k] - sorted_data[k-1])

    return {
        'Q1': quartile_position(.25),
        'Q2': median(data),
        'Q3': quartile_position(.75)
    }

In [6]:
from collections import Counter

In [7]:
def mode(
        data: List[float],
        all_modes: bool = False
    ) -> Union[List[Union[int, float]], Union[int, float]]:

    """
    Calculate data's mode

    Args
    data: List[float]
    all_modes: bool (False)
        If true, returns a list with all modes, if there's more than one
        If false, returns the mode (most frequent value)

    Returns
    List[int, float] or int or float
        Most frequent values (Data's mode)
    """

    count = Counter(data)
    max_frequency = max(count.values())
    modes = [value for value, freq in count.items() if freq == max_frequency]

    if all_modes:
        return modes
    else:
        return modes[0] if modes else None

In [8]:
def frequency(
        data: List[float]
    ) -> dict:

    """
    Calculate data's frequencies

    Args
    data: List[float]
        Set of data

    Returns
    dict: {value_i: count_i, ...}
        Sorted data's frequency
    """

    return dict(Counter(data))

In [9]:
def percentile(
    data: List[float],
    percent: float
) -> Union[int, float]:

    """
    Calculate data's percentile

    Args
    data: List[float]
        Set of data
    percent: float
        Percentage reference

    Returns
    Union[int, float]
        Data set value that is higher than 'percent' of the values from the Data set
    """

    sorted_data = sorted(data)
    n = len(sorted_data)

    idx = int(n * percent)
    
    return sorted_data[idx]

In [10]:
def mean_absolute_deviation(
    data: List[float],
    inference: bool = False
) -> float:

    """
    Calculate data's mean absolute deviation

    Args
    data: List[float]
        Set of data
    inference: bool (False)
        If True, consider the calculation version used for inference
        If False, consider the traditional calculation version

    Returns
        Data's mean absolute deviation
    """
    
    if inference:
        return sum([abs(x - arithmetic_mean(data)) for x in data]) / (len(data) - 1)
    else:
        return sum([abs(x - arithmetic_mean(data)) for x in data]) / len(data)

In [None]:
def variance(
    data: List[float],
    inference: bool = False,
) -> float:

    """
    Calculate data's variance

    Args
    data: List[float]
        Set of data
    inference: bool (False)
        If True, consider the calculation version used for inference
        If False, consider the traditional calculation version

    Returns
        Data's variance
    """
    
    if inference:
        return sum([(x - arithmetic_mean(data)) ** 2 for x in data]) / (len(data) - 1)
    else:
        return sum([(x - arithmetic_mean(data)) ** 2 for x in data]) / len(data)

In [None]:
import numpy as np

In [None]:
def standard_deviation(
    data: List[float],
    inference: bool = False
) -> float:

    """
    Calculate data's standard deviation

    Args
    data: List[float]
        Set of data
    inference: bool (False)
        If True, consider the calculation version used for inference
        If False, consider the traditional calculation version

    Returns
    float
        Data's standard deviation
    """
    
    return np.sqrt(variance(data, inference))

In [None]:
def variation_coefficient(
    data: List[float],
    inference: bool = False
) -> float:

    """
    Calculate data's variation coefficient

    Args
    data: List[float]
        Set of data
    inference: bool (False)
        If True, consider the calculation version used for inference
        If False, consider the traditional calculation version

    Returns
    float
        Data's variation coefficient
    """

    return standard_deviation(data, inference) / arithmetic_mean(data)

In [None]:
def Tchebichev_theorem(
    data: List[float],
    k: Union[int, float],
    inference: bool = False
) -> bool:

    """
    The proportion of values inside k standard deviations, counted starting from the arithmetic mean, mus be inside 1- 1/kÂ².

    Args
    data: List[float]
        Set of data
    k: Union[int, float]
        Standard deviation multiple considered to countain the data
    inference: bool (False)
        If True, consider the calculation version used for inference
        If False, consider the traditional calculation version

    Returns
    bool
        Verification of the Tchebichec Theorem
    """

    mean = arithmetic_mean(data)
    std_dev = standard_deviation(data, inference)

    k_std_dev = k * std_dev

    inf_limit = mean - k_std_dev
    sup_limit = mean + k_std_dev

    limit_range = 1 - 1 / (k ** 2)
    
    return assert()