In [172]:
# The Student’s t-test 
# will comment on whether it is likely to observe two samples 
# given that the samples were drawn from the same population.

# Assumptions:
# - both samples were drawn from a Gaussian distribution. 
# - The test also assumes that the samples have the same variance, 
# - and the same size, 

    # although there are corrections to the test if these assumptions do not hold. 
    # For example, see Welch’s t-test.

# Types:
# - Independent Samples: when two samples are unrelated.
# - Dependent Samples: repeated measures on the same population.

# It's from Chapter 9 “t Tests” in “Statistics in Plain English“, Third Edition, 2010.

# The test works by:
    
    # checking the means from two samples to see if hey are significantly different from each other. 
    # It does this by calculating...

    # The standard error 
    # in the difference between means, which can be interpreted 
    # to see how likely the difference is, if the two samples have the same mean (the null hypothesis).

    # The t statistic 
    # can be interpreted by comparing it to critical values from the t-distribution. 
    
    # The critical value  
    # is calculated using the degrees of freedom and a significance level with the percent point function (PPF).

        # If abs(t-statistic) <= critical value: Accept null hypothesis that the means are equal.
        # If abs(t-statistic) > critical value: Reject the null hypothesis that the means are equal.

        # We can also retrieve the cumulative probability of observing 
        # the absolute value of the t-statistic 
        # using the cumulative distribution function (CDF) of the t-distribution 
        # in order to calculate a p-value. 

        # The p-value can then be compared to a chosen significance level (alpha) such as 0.05 
        # to determine if the null hypothesis can be rejected:

            # If p > alpha: Accept null hypothesis that the means are equal.
            # If p <= alpha: Reject null hypothesis that the means are equal.

In [297]:
from __future__ import print_function, division

import numpy
import scipy.stats
import pandas as pd
import matplotlib.pyplot as pyplot

from math import sqrt
from numpy.random import randn
from numpy import mean
from scipy.stats import sem
from scipy.stats import t

%matplotlib inline

In [298]:
# Dataset [Mathematic exam results with wide range of data about student]
mat = pd.read_csv("student-mat.csv", delimiter=";")
mat.head(2)

# New feature Gmax: average all 3 exams scores:
mat["Gmax"] = round((mat["G1"] + mat["G2"] + mat["G3"]) / 3, 2)

# Remove all data but four key features:
mat = mat[['sex', "Mjob", "Fjob", "Gmax"]]

# DATASETS: by mother's job (at_home vs in_health and controll group as other_jobs and all_students)
mother_at_home = mat[ mat['Mjob'] == "at_home" ]
mother_health = mat[ mat['Mjob'] == "health" ]
mother_others = mat[(mat.Mjob != "health") & (mat.Mjob != "at_home")]
all_students = mat

# Ready datasets:
mj_sets = [mother_at_home, mother_health, mother_others, all_students]

# Initial info:
for each in mj_sets:
    samples = len(each)
    mean = each.Gmax.mean()
    print("n: {:<5}  mean: {}".format(samples, mean))

n: 59     mean: 9.762542372881354
n: 34     mean: 12.23470588235294
n: 302    mean: 10.683079470198676
n: 395    mean: 10.679139240506329


In [303]:
# Pick groups and category:
group1 = mother_at_home
group2 = mother_health
category = "Gmax"

# Initial parameters:
alpha = 0.05

def t_test(group1, group2, category, alpha):

    g1, g2 = list(group1[category]), list(group2[category])
    n1, n2 = len(g1), len(g2)
    
    # Standard error difference
    std1 = np.std(g1, ddof=1)
    std2 = np.std(g2, ddof=1)
    standard_error1 = std1/sqrt(n1) # sem(g1)
    standard_error2 = std2/sqrt(n2) # sem(g2)
    standard_error_difference = sqrt(standard_error1**2.0 + standard_error2**2.0)
    
    # t-test statistic
    mean1 = np.mean(g1)
    mean2 = np.mean(g2)
    t_statistic = (mean1 - mean2) / standard_error_difference
    
    # p-value, critical value, degrees of freedom
    degrees_of_freedom = n1 + n2 - 2
    critical_value = t.ppf(1.0 - alpha, degrees_of_freedom) # ppf is percent point function for a given significance level
    p_value = (1 - t.cdf(abs(t_statistic), degrees_of_freedom)) * 2
    
    # Interpretation
    if abs(t_statistic) <= critical_value:
        print('t interpretation: Accept null hypothesis that the means are equal.')
    else:
        print('t interpretation: Reject the null hypothesis that the means are equal.')
    if p_value > alpha:
        print('p interpretation: Accept null hypothesis that the means are equal.')
    else:
        print('p interpretation: Reject the null hypothesis that the means are equal.')

    # Results table
    result_table_data = [('Mean', [mean1, mean2, None,  alpha, standard_error_difference, 
                                   degrees_of_freedom, critical_value, t_statistic, p_value]),
         ('Sample size', [n1, n2, None, None, None, None, None, None, None]),
         ('Standard deviation', [std1, std2, None, None, None, None, None, None, None]),
         ('Standard error', [standard_error1, standard_error2, None, None, None, None, None, None, None])]
    result_table = pd.DataFrame.from_items(result_table_data)
    result_table.index = ['Group 1', 'Group 2', "", "Alpha", 'Standard error difference', 'Degrees of freedom', 
                 'Critical value', 't', 'p-value']
    result_table = result_table.replace(np.nan, '', regex=True)

    return result_table, p_value, t_statistic, degrees_of_freedom, critical_value

result_table, p_value, t_statistic, dof, cv = t_test(group1, group2, category, alpha)
result_table

2.47216350947
t interpretation: Reject the null hypothesis that the means are equal.
p interpretation: Reject the null hypothesis that the means are equal.


Unnamed: 0,Mean,Sample size,Standard deviation,Standard error
Group 1,9.76254,59.0,3.62715,0.472215
Group 2,12.2347,34.0,3.27074,0.560927
,,,,
Alpha,0.05,,,
Standard error difference,0.73323,,,
Degrees of freedom,91.0,,,
Critical value,1.66177,,,
t,-3.37161,,,
p-value,0.00109852,,,


In [305]:
# Pick groups and category:
group1 = mother_at_home[:30]
group2 = mother_health[:30]
category = "Gmax"

# Initial parameters:
alpha = 0.05

def td_test(group1, group2, category, alpha):
  
    g1, g2 = list(group1[category]), list(group2[category])
    n, n2 = len(g1), len(g2) # later assert n1 = n2
    
    # Equal sample sizes assumption
    if n is not n2:
        print("Sample sizes are not equal: (g1:{}), (g2:{}) !".format(n, n2))
        return pd.DataFrame([n, n2], ["Group 1","Group 2"], columns =["Sample size"]), None, None, None, None
    
    # Standard error of mean difference
    sqrt_diff1 = sum([(g1[i]-g2[i])**2 for i in range(n)])
    sqrt_diff2 = sum([g1[i]-g2[i] for i in range(n)])
    std_of_mean_diff = sqrt((sqrt_diff1 - (sqrt_diff2**2 / n)) / (n - 1))
    standard_error_of_mean_diff = std_of_mean_diff / sqrt(n)

    # t-test statistic
    mean1, mean2 = np.mean(g1), np.mean(g2)
    t_statistic = (mean1 - mean2) / standard_error_of_mean_diff
    
    t2 = abs(np.mean(g1) - np.mean(g2))
    print(t2)

    # p-value
    degrees_of_freedom = n - 1
    critical_value = t.ppf(1.0 - alpha, degrees_of_freedom)
    p_value = (1.0 - t.cdf(abs(t_statistic), degrees_of_freedom)) * 2.0
    
    # Interpretation
    if abs(t_statistic) <= critical_value:
        print('t interpretation: Accept null hypothesis that the means are equal.')
    else:
        print('t interpretation: Reject the null hypothesis that the means are equal.')
    if p_value > alpha:
        print('p interpretation: Accept null hypothesis that the means are equal.')
    else:
        print('p interpretation: Reject the null hypothesis that the means are equal.')

    # Results table
    result_table_data = [('Mean', [mean1, mean2, None,  alpha, std_of_mean_diff, standard_error_of_mean_diff, 
                                   degrees_of_freedom, critical_value, t_statistic, p_value]),
         ('Sample size', [n, n2, None, None, None, None, None, None, None, None]),
         ('Squared difference', [sqrt_diff1, sqrt_diff2, None, None, None, None, None, None, None, None])]
    result_table = pd.DataFrame.from_items(result_table_data)
    result_table.index = ['Group 1', 'Group 2', "", "Alpha", 'Standard deviation of mean diff', 
                          'Standard error of mean diff', 'Degrees of freedom', 'Critical value', 't', 'p-value']
    result_table = result_table.replace(np.nan, '', regex=True)
        
    return result_table, p_value, t_statistic, degrees_of_freedom, critical_value

result_table, p_value, t_statistic, dof, cv = td_test(group1, group2, category, alpha)
result_table

3.38866666667
t interpretation: Reject the null hypothesis that the means are equal.
p interpretation: Reject the null hypothesis that the means are equal.


Unnamed: 0,Mean,Sample size,Squared difference
Group 1,8.49967,30.0,1200.84
Group 2,11.8883,30.0,-101.66
,,,
Alpha,0.05,,
Standard deviation of mean diff,5.43408,,
Standard error of mean diff,0.992122,,
Degrees of freedom,29.0,,
Critical value,1.69913,,
t,-3.41557,,
p-value,0.00190169,,


In [311]:
# Extensions
# This section lists some ideas for extending the tutorial that you may wish to explore.

# Apply each test to your own contrived sample problem.
# Update the independent test and add the correction for samples with different variances and sample sizes.
# Perform a code review of one of the tests implemented in the SciPy library and summarize the differences in the implementation details.
# If you explore any of these extensions, I’d love to know.

# scipy.stats.ttest_ind API
# scipy.stats.ttest_rel API
# scipy.stats.sem API
# scipy.stats.t API

# !~~~!~!~!~!~!~!~!~!~!
# https://machinelearningmastery.com/statistical-hypothesis-tests-in-python-cheat-sheet/
# https://machinelearningmastery.com/how-to-calculate-the-5-number-summary-for-your-data-in-python/