# Descriptive Statistics

## Description

Descriptive statistics are brief descriptive coefficients that summarize a given data set, which can be either a representation of the entire or a sample of a population.  Descriptive statistics are broken down into measures of central tendency and measures of variability (spread).

## Import

In [0]:
from typing import List

## Data

In [0]:
a = list(range(1, 11))
b = list(range(11, 0, -1))
c = a + [1]
d = c + [2]
e = list(range(2, 21, 2))
age_x = [43, 21, 25, 42, 57, 59]
glucose_level_y = [99, 65, 79, 75, 87, 81]
math_score = [56, 29, 45, 93, 67, 38, 85, 77, 56, 71]
time_taken_to_run_100m_secs = [11.3, 12.9, 11.9, 10.2, 11.1, 12.5, 10.8, 10.5, \
                               12.0, 10.9]
print(a)
print(b)
print(c)
print(d)
print(e)
print(age_x)
print(glucose_level_y)
print(math_score)
print(time_taken_to_run_100m_secs)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2]
[2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
[43, 21, 25, 42, 57, 59]
[99, 65, 79, 75, 87, 81]
[56, 29, 45, 93, 67, 38, 85, 77, 56, 71]
[11.3, 12.9, 11.9, 10.2, 11.1, 12.5, 10.8, 10.5, 12.0, 10.9]


## Count

In [0]:
def calc_count(x: List[float]) -> int:
    """Returns the number of data points in a data set"""
    return len(x)

## Output

In [0]:
print(calc_count(a))
print(calc_count(b))

10
11


## Minimum

In [0]:
def calc_minimum(x: List[float]) -> int:
    """Returns the lowest data point in a data set"""
    return min(x)

## Output

In [0]:
print(calc_minimum(a))
print(calc_minimum(b))

1
1


## Maximum

In [0]:
def calc_maximum(x: List[float]) -> int:
    """Returns the highest data point in a data set"""
    return max(x)

## Output

In [0]:
print(calc_maximum(a))
print(calc_maximum(b))

10
11


## Range

In [0]:
def calc_range(x: List[float]) -> float:
    """Returns the difference between the lowest and highest data points in a 
    data set"""
    return calc_maximum(x) - calc_minimum(x)

## Output

In [0]:
print(calc_range(a))
print(calc_range(b))

9
10


## Mean

In [0]:
def calc_mean(x: List[float]) -> float:
    """Returns the sum of the data points divided by the number of data points 
    in a data set"""
    return sum(x) / calc_count(x)   

## Output

In [0]:
print(calc_mean(a))
print(calc_mean(b))

5.5
6.0


## Median

In [0]:
def calc_median(x: List[float]) -> float:
    """Returns the middle data point in a data set"""
    if calc_count(x) % 2 == 1:
        return sorted(x)[calc_count(x) // 2]
    if calc_count(x) % 2 == 0:
        return ((sorted(x)[(calc_count(x) // 2) - 1]) + \
                (sorted(x)[calc_count(x) // 2])) / 2

## Output

In [0]:
print(calc_median(a))
print(calc_median(b))

5.5
6


## Quantile

In [0]:
def calc_quantile(x: List[float], y: float) -> float:
    """Returns the percentile value in the data set"""
    return sorted(x)[int(y * calc_count(x))]

## Output

In [0]:
print(calc_quantile(a, 0.25))
print(calc_quantile(a, 0.50))
print(calc_quantile(a, 0.75))

3
6
8


## Interquantile Range

In [0]:
def calc_interquantile_range(x: List[float]) -> float:
    """Returns the difference between the 75th percentile and the 25th 
    percentile in a data set"""
    return calc_quantile(x, 0.75) - calc_quantile(x, 0.25)

## Output

In [0]:
print(calc_interquantile_range(a))

5


## Mode

In [0]:
def calc_mode(x: List[float]) -> List[float]:
    """Returns the number(s) that appear most frequently in a data set"""
    frequency_dict = {}
    for i in x:
        if i not in frequency_dict:
            frequency_dict[i] = 1
        else:
            frequency_dict[i] += 1
    modes = []
    highest_frequency = sorted(frequency_dict.values())[-1]
    for key, value in frequency_dict.items():
        if value == highest_frequency:
            modes.append(key)
    return modes

## Output

In [0]:
print(calc_mode(c))
print(calc_mode(d))

[1]
[1, 2]


## Variance

In [0]:
def calc_variance(x: List[float]) -> float:
    """Returns the average of the squared differences from the mean in a data 
    set"""
    assert calc_count(x) >= 2, "Variance requires at least two elements"
    return sum([(i - (calc_mean(x)))**2 for i in x]) / calc_count(x)

## Output

In [0]:
print(calc_variance(a))
print(calc_variance(b))

8.25
10.0


## Bessel's Correction Variance

In [0]:
def calc_bessel_variance(x: List[float]) -> float:
    """Returns the average of the squared differences from the mean in a data 
    set but corrected for bias in the estimation of the population variance"""
    assert calc_count(x) >= 2, "Variance requires at least two elements"
    return sum([(i - (calc_mean(x)))**2 for i in x]) / (calc_count(x) - 1)

## Output

In [0]:
print(calc_bessel_variance(a))
print(calc_bessel_variance(b))

9.166666666666666
11.0


## Standard Deviation

In [0]:
def calc_standard_deviation(x: List[float]) -> float:
    """Returns the dispersion of a dataset relative to its mean and is 
    calculated as the square root of the
    variance"""
    assert calc_count(x) >= 2, "Standard Deviation requires at least two " \
    "elements"
    return (sum([(i - (calc_mean(x)))**2 for i in x]) / calc_count(x))**0.5

## Output

In [0]:
print(calc_standard_deviation(a))
print(calc_standard_deviation(b))

2.8722813232690143
3.1622776601683795


## Bessel's Correction Standard Deviation

In [0]:
def calc_bessel_standard_deviation(x: List[float]) -> float:
    """Returns the dispersion of a dataset relative to its mean and is 
    calculated as the square root of the variance but corrected for bias in the 
    estimation of the population"""
    assert calc_count(x) >= 2, "Standard Deviation requires at least two " \
    "elements"
    return (sum([(i - (calc_mean(x)))**2 for i in x]) / \
            (calc_count(x) - 1))**0.5

## Output

In [0]:
print(calc_bessel_standard_deviation(a))
print(calc_bessel_standard_deviation(b))

3.0276503540974917
3.3166247903554


## Covariance

In [0]:
def calc_covariance(x: List[float], y: List[float]) -> float:
    """Returns a measurement of how changes in one variable are associated with 
    changes in a second variable"""
    assert calc_count(x) == calc_count(y), "x and y must contain the same " \
    "number of elements"
    return sum([(i - (calc_mean(x))) * (j - (calc_mean(y))) for i, \
                j in zip(x, y)]) / (calc_count(x) - 1)

## Output

In [0]:
print(calc_covariance(a, e))

18.333333333333332


## Correlation

In [0]:
def calc_correlation(x: List[float], y: List[float]) -> float:
    """Returns a measurement of the strength of the relationship between the 
    relative movements of two data sets"""
    if calc_bessel_standard_deviation(x) > 0 and \
    calc_bessel_standard_deviation(y) > 0:
        return calc_covariance(x, y) / calc_bessel_standard_deviation(x) / \
        calc_bessel_standard_deviation(y)
    else:
        return 0

## Output

In [0]:
print(calc_correlation(age_x, glucose_level_y))

0.5298089018901744


In [0]:
print(calc_correlation(math_score, time_taken_to_run_100m_secs))

-0.960157660824872
