In [None]:
# Import libraries
import numpy as np
import pandas as pd
import codecademylib3
from scipy.stats import binom_test
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from scipy.stats import chi2_contingency
import seaborn as sns
import matplotlib.pyplot as plt

# Import data
dogs = pd.read_csv('dog_data.csv')

# Subset to just whippets, terriers, and pitbulls
dogs_wtp = dogs[dogs.breed.isin(['whippet', 'terrier', 'pitbull'])]

# Subset to just poodles and shihtzus
dogs_ps = dogs[dogs.breed.isin(['poodle', 'shihtzu'])]

print(dogs.head())

# Analysis on whippets:
whippet_rescue = dogs.is_rescue[dogs.breed == 'whippet']
num_whippet_rescues = np.sum(whippet_rescue == 1)
print("No of rescued whippets:", num_whippet_rescues)
num_whippets = (len(whippet_rescue))
print("No of whippets:", num_whippets)

# Performing a binom test
# Null: 8% of whippets are rescues

# Alternative: more or less than 8% of whippets are rescues

p_value = binom_test(num_whippet_rescues, num_whippets, 0.08)
print("P-value:", p_value)

# Mid-Sized Dog Weight:
wt_whippets = dogs.weight[dogs.breed == 'whippet']
wt_terriers = dogs.weight[dogs.breed == 'terrier']
wt_pitbulls = dogs.weight[dogs.breed == 'pitbull']

# Visualizing in Boxplot:
sns.boxplot(x=dogs_wtp.breed, y=dogs_wtp.weight)
plt.show()

# Performing an ANOVA test:
# Null: whippets, terriers, and pitbulls all weigh the same amount on average

# Alternative: whippets, terriers, and pitbulls do not all weigh the same amount on average (at least one pair of breeds has differing average weights)

fstat, pval = f_oneway(wt_whippets, wt_terriers, wt_pitbulls)
print("P-value:", pval)

# Performing a Tukey range test:
tukey_results = pairwise_tukeyhsd(dogs_wtp.weight, dogs_wtp.breed, 0.05)
print(tukey_results)

# Poodle and Shihtzu Colors:
Xtab = pd.crosstab(dogs_ps.breed, dogs_ps.color)
print(Xtab)

# Performing a chi-square test:
# Null: There is an association between breed (poodle vs. shihtzu) and color.

# Alternative: There is not an association between breed (poodle vs. shihtzu) and color.

chi2, pval, dof, expected = chi2_contingency(Xtab)
print('P-value:', pval)