[Reference](https://medium.com/@abdullahorzan/data-backed-decision-making-a-b-testing-techniques-with-python-2599e80b29ac)

# Two-Sample Independent T-Test

In [1]:
import pandas as pd
import seaborn as sns
import statsmodels.stats.api as sms
from scipy.stats import shapiro, levene, ttest_ind, mannwhitneyu

######################################################
# Two-Sample Independent T-Test
######################################################

# 1. Formulate Hypotheses
# 2. Check Assumptions
#   - 1. Normality Assumption
#   - 2. Variance Homogeneity Assumption
# 3. Apply Hypothesis
#   - 1. If assumptions are satisfied: the parametric test
#   - 2. If assumptions are not satisfied: non-parametric test (Mann-Whitney U Test)
# 4. Examine results based on the p-value
# Important:
# - If normality is not satisfied, we can directly apply the Mann-Whitney U Test. If variance homogeneity is not satisfied, we add an equal_var parameter.
# - It can be beneficial to perform outlier detection and correction before conducting the normality assessment


############################
# Experiment: Is there a statistically significant difference in the mean bills between smokers and non-smokers?"
############################


df = sns.load_dataset("tips")
df.head()

df.groupby("smoker").agg({"total_bill": "mean"})

############################
# 1. Formulate the hypothesis
############################

# H0: M1 = M2
# H1: M1 != M2

############################
# 2. Assumption Checking
############################

# Normality Assumption
# Variance Homogeneity Assumption

############################
# Normality Assumption
############################

# H0: The assumption of the normal distribution is satisfied
# H1:.. is not satisfied.


test_stat, pvalue = shapiro(df.loc[df["smoker"] == "Yes", "total_bill"])
print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))

# if p-value < 0.05 H0 REJECT
# if p-value > 0.05 H0 FAIL TO REJECT


test_stat, pvalue = shapiro(df.loc[df["smoker"] == "No", "total_bill"])
print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))


############################
# Variance Homogeneity Assumption
############################

# H0: The variances are homogeneous
# H1: The variances are not homogeneous

test_stat, pvalue = levene(df.loc[df["smoker"] == "Yes", "total_bill"],
                           df.loc[df["smoker"] == "No", "total_bill"])
print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))

# if p-value < 0.05 H0 REJECT
# if p-value > 0.05 H0 FAIL TO REJECT

############################
# 3 ve 4. Application of the Hypothesis
############################

# 1. If the assumptions are satisfied, the two-sample independent t-test is applied (parametric test)
# 2. If the assumptions are not satisfied, the Mann-Whitney U test is used (non-parametric test)

############################
# 1.1 If the assumptions are met, the two-sample independent t-test is conducted (parametric test)
############################

test_stat, pvalue = ttest_ind(df.loc[df["smoker"] == "Yes", "total_bill"],
                              df.loc[df["smoker"] == "No", "total_bill"],
                              equal_var=True)

print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))

# if p-value < 0.05 H0 REJECT
# if p-value > 0.05 H0 FAIL TO REJECT

############################
# 1.2 If the assumptions are not met, the Mann-Whitney U test is conducted (non-parametric test)
############################

test_stat, pvalue = mannwhitneyu(df.loc[df["smoker"] == "Yes", "total_bill"],
                                 df.loc[df["smoker"] == "No", "total_bill"])

print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))

Test Stat = 0.9367, p-value = 0.0002
Test Stat = 0.9045, p-value = 0.0000
Test Stat = 4.0537, p-value = 0.0452
Test Stat = 1.3384, p-value = 0.1820
Test Stat = 7531.5000, p-value = 0.3413


# Two Independent Proportions Test

In [2]:
import seaborn as sns
import numpy as np
import pandas as pd
from statsmodels.stats.proportion import proportions_ztest

######################################################
# Two Independent Proportions Test
######################################################

# When applying this test, we input the total number of successes and the total number of observations
# as separate numpy arrays in the function, respectively.

############################
# Example
############################

total_num_of_success = np.array([300, 250])
total_num_of_observations = np.array([1000, 1100])

proportions_ztest(count=total_num_of_success, nobs=total_num_of_observations)


############################
# Application: Is there a statistically significant difference in the survival rates between males and females?
############################

# H0: p1 = p2
# There is no statistically significant difference in the survival rates between males and females

# H1: p1 != p2
# .. there is a statistically significant difference

df = sns.load_dataset("titanic")
df.head()

df.loc[df["sex"] == "female", "survived"].mean()

df.loc[df["sex"] == "male", "survived"].mean()

# The sum of the survivors gives us the total number of successes
female_succ_count = df.loc[df["sex"] == "female", "survived"].sum()
male_succ_count = df.loc[df["sex"] == "male", "survived"].sum()

# To obtain the total number of observations, we apply filtering and
# then retrieve the row count, which provides us with the total number of observations that meet the condition within the variable
test_stat, pvalue = proportions_ztest(count=[female_succ_count, male_succ_count],
                                      nobs=[df.loc[df["sex"] == "female", "survived"].shape[0],
                                            df.loc[df["sex"] == "male", "survived"].shape[0]])

print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))

Test Stat = 16.2188, p-value = 0.0000


# ANOVA (Analysis of Variance)

In [3]:
import seaborn as sns
import pandas as pd
from scipy.stats import f_oneway, kruskal, shapiro, levene, mannwhitneyu

######################################################
# ANOVA (Analysis of Variance)
######################################################


df = sns.load_dataset("tips")
df.head()

df.groupby("day")["total_bill"].mean()

############################
# 1. Formulate the hypothesis
############################

# HO: m1 = m2 = m3 = m4
# There is no significant difference among the group means

# H1: .. there is a difference

############################
# 2. Checking assumptions
############################

# Normality assumption
# Variance homogeneity assumption

# If the assumption is satisfied, we apply the One-Way ANOVA test
# If the assumption is not satisfied, we use the Kruskal-Wallis test

# H0: The assumption of the normal distribution is satisfied

for group in list(df["day"].unique()):
  #We select the first index element, which is the p-value, as the Shapiro test function returns both the test statistic and the p-value
    pvalue = shapiro(df.loc[df["day"] == group, "total_bill"])[1]
    print(group, 'p-value: %.4f' % pvalue)


# H0: The assumption of homogeneity of variances is satisfied

test_stat, pvalue = levene(df.loc[df["day"] == "Sun", "total_bill"],
                           df.loc[df["day"] == "Sat", "total_bill"],
                           df.loc[df["day"] == "Thur", "total_bill"],
                           df.loc[df["day"] == "Fri", "total_bill"])
print('Test Stat = %.4f, p-value = %.4f' % (test_stat, pvalue))

############################
# 3. Hypothesis testing and interpretation of the p-value
############################

# HO: There is no statistically significant difference among the group means

# Parametric ANOVA test:
f_oneway(df.loc[df["day"] == "Thur", "total_bill"],
         df.loc[df["day"] == "Fri", "total_bill"],
         df.loc[df["day"] == "Sat", "total_bill"],
         df.loc[df["day"] == "Sun", "total_bill"])

# Non-parametric ANOVA test:
kruskal(df.loc[df["day"] == "Thur", "total_bill"],
        df.loc[df["day"] == "Fri", "total_bill"],
        df.loc[df["day"] == "Sat", "total_bill"],
        df.loc[df["day"] == "Sun", "total_bill"])

Sun p-value: 0.0036
Sat p-value: 0.0000
Thur p-value: 0.0000
Fri p-value: 0.0409
Test Stat = 0.6654, p-value = 0.5741


KruskalResult(statistic=10.403076391437086, pvalue=0.01543300820104127)

In [4]:
from statsmodels.stats.multicomp import MultiComparison

comparison = MultiComparison(df['total_bill'], df['day'])
tukey = comparison.tukeyhsd(0.05)

print(tukey.summary())

Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
   Fri    Sat   3.2898 0.4541 -2.4799  9.0595  False
   Fri    Sun   4.2584 0.2371 -1.5856 10.1025  False
   Fri   Thur   0.5312 0.9957 -5.4434  6.5057  False
   Sat    Sun   0.9686 0.8968 -2.6088   4.546  False
   Sat   Thur  -2.7586 0.2374 -6.5455  1.0282  False
   Sun   Thur  -3.7273 0.0668 -7.6264  0.1719  False
----------------------------------------------------
