In [42]:
from typing import List
from collections import Counter
import math

In [2]:
def mean(xs: List[float]) -> float:
    '''return mean of a list'''
    return sum(xs) / len(xs)

print(mean([1, 2, 3, 4, 5]))

3.0


In [7]:
def _median_odd(xs: List[float]) -> float:
    return sorted(xs)[len(xs) // 2]

def _median_even(xs: List[float]) -> float:
    xs = sorted(xs)
    return (xs[len(xs) // 2 - 1] + xs[len(xs) // 2]) / 2

def median(xs: List[float]) -> float:
    '''find the middle-most value of xs'''
    return _median_even(xs) if len(xs) % 2 == 0 else _median_odd(xs)

print(median([1, 2, 3, 4, 5]))
print(median([1, 2, 3, 4, 5, 6]))

3
3.5


In [17]:
def quantile(xs: List[float], p: float) -> float:
    '''return p-th percentile value of xs'''
    assert p >= 0 and p < 1, 'p has to be between 0 and 1 (exclusive)'
    
    return sorted(xs)[int(p * len(xs))]

print(quantile([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 0.25))

3


In [26]:
def mode(xs: List[float]) -> List[float]:
    '''return list of the most common values xs'''
    
    counts = Counter(xs)
    max_count = counts.most_common(1)[0][1]
    return [x for x, y in counts.items() if y == max_count]

print(mode([1, 1, 2, 2, 3, 4, 5, 6, 6]))

[1, 2, 6]


In [30]:
def data_range(xs: List[float]) -> float:
    '''return difference between the smallest and the largest element in xs'''
    return max(xs) - min(xs)

print(data_range([1, 2, 3, 4, 5]))

4


In [41]:
Vector = List[float]

def subtract(v: Vector, w: Vector) -> Vector:
    assert len(v) == len(w), 'vectors must be the same length'
    return [x - y for x, y in zip(v, w)]

def sum_of_squares(v: Vector) -> float:
    return sum([x ** 2 for x in v])

def variance(xs: List[float]) -> float:
    '''
    compute variance of xs
    variance is computed:
    - subtracting mean from of the xs from xs component-wise
    - get sum of squares of the difference
    - divide by n where n is length of the xs
    '''
    
    mean_subtracted = subtract(xs, [mean(xs)] * len(xs))
    return sum_of_squares(mean_subtracted) / (len(xs))
    
print(variance([1, 2, 3, 4, 5, 6, 100]))

1142.7755102040817


In [46]:
def standard_deviation(xs: List[float]) -> float:
    '''return standard deviation -> squared root of variance'''
    return math.sqrt(variance(xs))

print(standard_deviation([1, 2, 3, 4, 5, 6, 100]))

33.804962804358794


In [47]:
def interquartile_range(xs: List[float]) -> float:
    '''return difference between 0.75-ile and 0.25-ile'''
    return quantile(xs, 0.75) - quantile(xs, 0.25)

In [68]:
def dot(v: Vector, w: Vector) -> float:
    assert len(v) == len(w), 'vectors must be the same length'
    return sum([x * y for x, y in zip(v, w)])

def covariance(xs: List[float], ys: List[float]) -> float:
    '''
    covariance is defined as a dot product of two vectors that has their mean 
    subtracted from them, divided by the length
    '''
    
    xs_mean_subtracted = subtract(xs, [mean(xs)] * len(xs))
    ys_mean_subtracted = subtract(ys, [mean(ys)] * len(ys))
    
    return dot(xs, ys) / len(xs)

x = [1, 2, 3, 4, 5]
y = [100, 110, 111, 120, 150]
print(covariance(x, y))

376.6
a


In [79]:
def correlation(xs: List[float], ys: List[float]) -> float:
    '''measures how much xs and ys vary in tandem about their means'''
    stdev_x = standard_deviation(xs)
    stdev_y = standard_deviation(ys)
    
    if stdev_x > 0 and stdev_y > 0: 
        return covariance(xs, ys) / stdev_x / stdev_y
    else:
        return 0