STATISTICS

 


Troubleshooting variance calculations

In [16]:
import math

In [17]:
def mean(x):
    return(sum(x) / len(x))

In [18]:
def de_mean(x):
    # Translate x by subtracting its mean.
    x_bar = mean(x)
    return([x_i - x_bar for x_i in x])

In [19]:
def dot(v,w):
    # v_1 * w_1 + ... v_n * w_n
    return sum(v_i * w_i
              for v_i, w_i in zip(v, w))

In [20]:
def sum_of_squares(v):
    # v_1 * v_1 + ... v_i * v_i
    return dot(v,v)

In [21]:
def variance(x):
    # Units are strange, squared, not physical.
    
    n = len(x)
    deviations = de_mean(x)
 
    return(sum_of_squares(deviations) / (n - 1))

In [22]:
def standard_deviation(x):
    # Same units as parent, physically relevent.
    
    return(math.sqrt(variance(x)))

In [51]:
def quantile(x, p):
    # returns the pth-percentile value in x
    p_index = int(p*len(x))
    return sorted(x)[p_index]

In [44]:
def interquartile_range(x):
    # Avoids outlier effects
    
    return(quantile(x, 0.75) - quantile(x, 0.25))

In [59]:
def covariance(x, y):
    # Changes with size of values, so can be hard to compare.
    
    n = len(x)
    return(dot(de_mean(x), de_mean(y)) / (n - 1)  )

In [66]:
def correlation(x, y):
    # Unitless 0 - 1, good for comparing fits.
    
    stdev_x = standard_deviation(x)
    stdev_y = standard_deviation(y)
    if stdev_x > 0 and stdev_y > 0:
        return(covariance(x, y) / stdev_x / stdev_y)
    else:
        return(0)

In [67]:
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
more_data = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

In [68]:
mean(data)

5.5

In [69]:
variance(data)

9.166666666666666

In [70]:
standard_deviation(data)

3.0276503540974917

In [71]:
interquartile_range(data)

5

In [72]:
covariance(data, more_data)

91.66666666666667

In [73]:
correlation(data, more_data)

1.0000000000000002

Correlations can be misleading when other confounding variables are ignored.  Correlations look at relationships between two variables with all else being equal.  Binning into relevent subgroups can also help here.