# Lecture 26: Anotha' ANOVA Solutions
***

We'll need Numpy, Matplotlib, Pandas, and scipy.stats for this notebook, so let's load them. 

In [11]:
import numpy as np 
from scipy import stats
import statsmodels.api as sm 
import pandas as pd
import matplotlib.pylab as plt 
%matplotlib inline

### Suppose we have data from an experiment

A number of otters were dressed in costumes and elementary school children were asked to bid on them to take them home as pets, using a token system. The costume types were octopus, robot, and panda. The question is whether the costumes actually have different effects from each other, or whether they are statistically the same. 

In [12]:
# Let's load in the data and take a look
df = pd.read_csv("data/pro.csv")
df.head(11)

Unnamed: 0,octopuses,robots,pandas
0,2.0,5,7.0
1,4.0,5,4.0
2,7.0,3,6.0
3,1.0,4,8.0
4,2.0,4,6.0
5,1.0,7,6.0
6,5.0,2,2.0
7,,2,9.0
8,,6,
9,,3,


In [13]:
# Let's iterate over the groups
groups = ["octopuses", "robots", "pandas"]

# We'll accumulate SSW and the ingredients for the grand mean
SSW=0
grandsum = 0
grandn = 0
for group in groups:
    x = df[group].dropna()
    xbar = np.mean(x)
    SSW += np.sum((x-xbar)**2)
    grandn += len(x)
    grandsum += np.sum(x)

# compute the grand mean
grandmean = grandsum/grandn

# Now let's accumulate SSB
SSB = 0
for group in groups:
    x = df[group].dropna()
    xbar = np.mean(x)
    n = len(x)
    SSB += n*np.sum((xbar - grandmean)**2)
    
# compute the dof.
SSBdf = len(groups)-1
SSWdf = grandn - len(groups)

# compute our Fstatistic
Fstat = (SSB/SSBdf) /  (SSW/SSWdf)

In [20]:
# Let's compute our pvalue
pval = 1-stats.f.cdf(Fstat,SSBdf,SSWdf)

print('F (by hand)\t{:.4f}\npval (by hand)\t{:.4f}'.format(Fstat,pval))

F (by hand)	3.8887
pval (by hand)	0.0351


In [16]:
F, p = stats.f_oneway(df['octopuses'].dropna(), df['robots'].dropna(), df['pandas'].dropna())
print('F (scipy) \t{:.4f}\npval (scipy)\t{:.4f}'.format(F,p))

F (scipy) 	3.8887
pval (scipy)	0.0351
