# One Way ANOVAs in Python

A One-Way ANOVA ("analysis of variance") compares the means of two or more independent groups in order to determine whether there is statistical evidence that the associated population means are significantly different.

Here we will test 4 dimensions of 3 flowers to determine if they come from the same popuation.

In [1]:
# Import packages
import pandas as pd
import seaborn as sns
from scipy.stats import f_oneway

In [5]:
# Load data
df = sns.load_dataset('iris')

In [54]:
# View data
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [56]:
# Get list of flower types
flower_types = list(df.species.unique())
flower_types

['setosa', 'versicolor', 'virginica']

In [57]:
# Start by testing one column-Sepal Length
setosa = df[df['species']=='setosa']['sepal_length']
versicolor = df[df['species']=='versicolor']['sepal_length']
virginica = df[df['species']=='virginica']['sepal_length']

In [26]:
# Run one way anova
f_oneway(setosa, versicolor, virginica)

F_onewayResult(statistic=119.26450218450468, pvalue=1.6696691907693826e-31)

Returns a test statistic and a p-value

In [60]:
# List all columns
df.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

In [62]:
# Select columns to run
cols_to_run = df.columns.drop('species')
cols_to_run

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], dtype='object')

In [63]:
# Use for loop to iterate over the columns and run a one way anova on each
for col in cols_to_run:
    print(col)
    setosa = df[df['species']=='setosa'][col]
    versicolor = df[df['species']=='versicolor'][col]
    virginica = df[df['species']=='virginica'][col]
    print(f_oneway(setosa, versicolor, virginica))
    print('\n')

sepal_length
F_onewayResult(statistic=119.26450218450468, pvalue=1.6696691907693826e-31)


sepal_width
F_onewayResult(statistic=49.160040089612075, pvalue=4.492017133309115e-17)


petal_length
F_onewayResult(statistic=1180.161182252981, pvalue=2.8567766109615584e-91)


petal_width
F_onewayResult(statistic=960.007146801809, pvalue=4.169445839443116e-85)


