In [1]:
import numpy as np
from collections import Counter
from statistics import mode

In [2]:
folks = [14, 12, 11, 10, 8, 6, 8]

In [33]:
# Calculate mean.
sum(folks)/len(folks)

9.857142857142858

In [34]:
# Calculate median.
sorted(folks)
folks[3]

10

In [35]:
# Find the mode; if all value are equal, there is no mode.
ctr = dict(Counter(folks))
max(ctr, key=ctr.get)

8

In [36]:
# Variance of our data set, per the formula: 
# Square of the sum of each value minus the mean all divided by sample size minus one.
variance = np.array(sum(map(lambda x: ((x - (sum(folks)/len(folks))) ** 2), folks))) / len(folks)
print(variance)

6.408163265306122


In [37]:
# Standard deviation = the square root of variance.
std = np.sqrt(variance)
print(std)

2.531435020952764


In [38]:
# Standard error is the standard deviation divided by the square root of the sample size.
np.sqrt(std) / np.sqrt(len(folks))

0.6013597712034148

In [39]:
# Verification
def drill(_list):
    mn = np.mean(_list)
    med = np.median(_list)
    std = np.std(_list)
    stderr = np.sqrt(np.std(_list)) / np.sqrt(len(_list))

    try:
        md = mode(_list)
        print("Mean: {}\nMedian: {}\nMode: {}\nStandard Deviation: {}\nStandard Error: {}".format(mn, med, md, std, stderr))
    except:
        print("Mode not found!")
        print("Mean: {}\nMedian: {}\nStandard Deviation: {}\nStandard Error: {}".format(mn, med, std, stderr))
        
    
drill(folks)

Mean: 9.857142857142858
Median: 10.0
Mode: 8
Standard Deviation: 2.531435020952764
Standard Error: 0.6013597712034148


## Notes

For this small dataset, I would choose the mean - because the dataset contains no clear outliers - and I would choose the standard deviation to express variation - because it is simple to understand in relation to the ages.

In [40]:
# Cindy has a birthday
folks2 = sorted(folks)
folks2[0] = 7
drill(folks2)

Mean: 10.0
Median: 10.0
Mode: 8
Standard Deviation: 2.32992949004287
Standard Error: 0.5769289991031169


## Notes

With Cindy's birthday updated in the list, we can see all values are affected fairly minimally - save the median and mode, which see no change.

In [41]:
folks3 = sorted(folks2)
folks3[1] = 1
drill(sorted(folks3))

Mode not found!
Mean: 9.0
Median: 10.0
Standard Deviation: 3.927922024247863
Standard Error: 0.7490872564325832


## Notes

With Cousin Oliver replaced by baby Jessica, we see that some of the values have changed considerably - notably the measures of variation. With this new dataset, I might select the median for central tendency as it excludes the outlier. I would keep using the standard deviation as an indicator of spread because it represents the set's variation well.

In [42]:
fanbois = np.array([20, 23, 17, 5])
relevance = np.array([.30, .30, .30, .10])
print(sum(fanbois * relevance), 'percent of adult Americans are *probably* Brady Bunch fans.')

18.5 percent of adult Americans are *probably* Brady Bunch fans.


## Notes

Without any true scientific backing, I came to this conclusion by giving weights to each of the polling sources.

TV Guide, Entertainment Weekly, and Pop Culture Today ring the tune of a viewer base who can relate to the show. At least the relevance seems more logical than the readership of SciPhi Phanatic - which is why I gave magazine poll a much lower weight.