In [1]:
# Import standard packages
import numpy as np
import scipy.stats as stats
import pandas as pd


In [28]:
data = np.loadtxt('F:\SZKOLENIE\data\910.txt')
 # Sort them into groups, according to column 1
group1 = data[data[:,1]==1,0]
group2 = data[data[:,1]==2,0]
group3 = data[data[:,1]==3,0]

In [30]:
data.shape

(22, 2)

In [5]:
# First, check if the variances are equal, with the "Levene"-test
(W,p) = stats.levene(group1, group2, group3)
if p<0.05:
    print(('Warning: the p-value of the Levene test is <0.05: p={0}'.format(p)))



In [8]:
# Do the one-way ANOVA
F_statistic, pVal = stats.f_oneway(group1, group2, group3)

In [9]:
# Print the results
print('Data form Altman 910:')
print((F_statistic, pVal))
if pVal < 0.05:
    print('One of the groups is significantly different.')

Data form Altman 910:
(3.7113359882669763, 0.043589334959178244)
One of the groups is significantly different.


In [14]:
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
# Elegant alternative implementation, with pandas & statsmodels
df = pd.DataFrame(data, columns=['value', 'treatment'])    
model = ols('value ~ C(treatment)', df).fit()
anovaResults = anova_lm(model)
print(anovaResults)

                df        sum_sq      mean_sq         F    PR(>F)
C(treatment)   2.0  15515.766414  7757.883207  3.711336  0.043589
Residual      19.0  39716.097222  2090.320906       NaN       NaN


  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


# anova_byHand

In [31]:
data = np.loadtxt('F:\SZKOLENIE\data\910.txt')

In [32]:
# Convert them to pandas-forman and group them by their group value
df = pd.DataFrame(data, columns=['values', 'group'])
groups = df.groupby('group')

In [33]:
# The "total sum-square" is the squared deviation from the mean
ss_total = np.sum((df['values']-df['values'].mean())**2)

In [34]:
# Calculate ss_treatment and  ss_error
(ss_treatments, ss_error) = (0, 0)
for val, group in groups:
    ss_error += sum((group['values'] - group['values'].mean())**2)
    ss_treatments += len(group) * (group['values'].mean() - df['values'].mean())**2

df_groups = len(groups)-1
df_residuals = len(data)-len(groups)
F = (ss_treatments/df_groups) / (ss_error/df_residuals)
df = stats.f(df_groups,df_residuals)
p = df.sf(F)

print(('ANOVA-Results: F = {0}, and p<{1}'.format(F, p)))

ANOVA-Results: F = 3.7113359882669754, and p<0.043589334959178244


#  anova_statsmodels

In [20]:
data = pd.read_csv('F:\SZKOLENIE\data\galton.csv')

In [23]:
anova_results = anova_lm(ols('height ~ 1 + sex', data).fit())
print('\nANOVA with "statsmodels" ------------------------------')
print(anova_results)



ANOVA with "statsmodels" ------------------------------
             df       sum_sq      mean_sq           F         PR(>F)
sex         1.0  5874.573234  5874.573234  933.184603  5.175573e-141
Residual  896.0  5640.489138     6.295189         NaN            NaN


  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


# Shows the equivalence of t-test and f-test, for comparing two groups

In [26]:
# Get the data
data = pd.read_csv('F:\SZKOLENIE\data\galton.csv')

# First, calculate the F- and the T-values, ...
F_statistic, pVal = stats.f_oneway(data['father'], data['mother'])
t_val, pVal_t = stats.ttest_ind(data['father'], data['mother'])

# ... and show that t**2 = F
print('\nT^2 == F: ------------------------------------------')
print(('From the t-test we get t^2={0:5.3f}, and from the F-test F={1:5.3f}'.format(t_val**2, F_statistic)))


T^2 == F: ------------------------------------------
From the t-test we get t^2=2083.481, and from the F-test F=2083.481
