# Performing t tests in python

## Basic inferential statistical tests comparing two means. 
### One-sample, independent groups, paired samples t tests

## One-sample T-test

In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import math 

In [2]:
# Creating some dummy data to use as an example. This is data for mean voting age for the population and for a sample
# from Yorkshire. This has been randomly generated so the means for the sample and the population differ.
# Two different distributions have been generated here, one for the popn and one for the sample from yorks. 

np.random.seed(6)

population_ages1 = stats.poisson.rvs(loc = 18, mu = 35, size = 150000)
population_ages2 = stats.poisson.rvs(loc = 18, mu = 10, size = 100000)
popn_ages = np.concatenate((population_ages1, population_ages2))

yorkshire_ages1 = stats.poisson.rvs(loc = 18, mu = 30, size = 30)
yorkshire_ages2 = stats.poisson.rvs(loc = 18, mu = 10, size = 20)
yorks_ages = np.concatenate((yorkshire_ages1, yorkshire_ages2))

print(popn_ages.mean())
print(yorks_ages.mean())

43.000112
39.26


In [3]:
# To conduct an inferential statistical test at the 95% confidence level to see if the mean age for yorks voters differs
# significantly from the mean age of voters in the popn we can use the scipy stats.ttest_1samp method:

stats.ttest_1samp(a = yorks_ages, popmean = popn_ages.mean()) # Note the arguments are the sample data and the popn mean
# for comparison. 
# The results give us significant result at the p = 0.013 level. 

Ttest_1sampResult(statistic=-2.5742714883655027, pvalue=0.013118685425061678)

In [4]:
# So, the t value lies outside the 95% confidence intervals for the t-distribution based on our population. 
# We can check this by getting quantiles for the top and bottom 2.5 tails of the distribution and comparing our
# t value against this. 

stats.t.ppf(q = 0.025, df = 49) # Note the arguments are our quantiles (the bottom 2.5%) and the degrees of freedom. 

-2.0095752344892093

In [5]:
# Similarly, checking the 95% CI for the top 2.5%
stats.t.ppf(q = 0.975, df = 49) # Note the arguments are our quantiles (the top 2.5%) and the degrees of freedom. 

2.009575234489209

In [6]:
# An alternative way to get the p-value is using our t value as the argument and the stats.t.cdf method:
stats.t.cdf(x = -2.5742, df = 49) * 2   # Note here we are multiplying the cdf by 2 as we did a two-tailed test. 

# As can be seen here and above the probability of getting this difference in means by chance if there is no
# difference between our sample and the popn is about 1.3%. Highly unlikely. 

0.013121066545690117

In [7]:
# If we were to construct a 95% confidence interval for our sample it would not capture the population mean:

sigma = yorks_ages.std()/math.sqrt(50)  # Sample std/ sample size

stats.t.interval(0.95, df = 49, loc = yorks_ages.mean(), scale = sigma) 

# The arguments in the interval method above are (confidence interval, df, sample mean, std dev estimate)
# The output gives us our 95% CIs. 

(36.369669080722176, 42.15033091927782)

In [8]:
# If we wanted a stricter significance level criterion we can adjust the alpha level. 
# For example, if we used a 99% threshold for sig, our results would no longer be significant
# and the 99% CIs would capture the popn mean (of 43.00):

stats.t.interval(alpha = 0.99, df = 49, loc = yorks_ages.mean(), scale = sigma) 

(35.40547994092107, 43.11452005907893)

## Two-sample T-test

### Independent Groups T-Test

In [9]:
# Creating a second set of dummy sample data relating to voter ages in the county of Hertfordshire
np.random.seed(12)
herts_ages1 = stats.poisson.rvs(loc = 18, mu = 33, size = 30)
herts_ages2 = stats.poisson.rvs(loc = 18, mu = 13, size = 20)
herts_ages = np.concatenate((herts_ages1, herts_ages2))

print(herts_ages.mean())

42.8


In [10]:
# Running an independent groups t-test using ttest_ind from scipy:

stats.ttest_ind(a = yorks_ages, b = herts_ages, equal_var = False) 
# Equal var arguments allows us to specify whether the samples meet the assumption of equal variances. 
# In this case we don't get a significant difference between two samples (p = 0.09).

Ttest_indResult(statistic=-1.7083870793286842, pvalue=0.09073104343957748)

### Paired samples (repeated measures) T-Test

In [21]:
# Creating a dummy data frame showing weight loss measurements from the same participants at two different times,
# before and after some weight loss treatment (as a simulated example).

np.random.seed(11)

before = stats.norm.rvs(scale = 30, loc = 250, size = 100)
after = before + stats.norm.rvs(scale = 5, loc = -1.25, size = 100)

weight_df = pd.DataFrame({"Weight Before": before, "Weight After": after, "Weight Change": after - before})

weight_df.describe()

Unnamed: 0,Weight Before,Weight After,Weight Change
count,100.0,100.0,100.0
mean,250.345546,249.115171,-1.230375
std,28.132539,28.422183,4.783696
min,170.400443,165.91393,-11.495286
25%,230.421042,229.148236,-4.046211
50%,250.830805,251.134089,-1.413463
75%,270.637145,268.927258,1.738673
max,314.700233,316.720357,9.759282


In [22]:
# We use the scipy.stats method ttest_rel to run a paired-samples test. 

stats.ttest_rel(a = before, b = after)

Ttest_relResult(statistic=2.5720175998568284, pvalue=0.011596444318439857)

## Running an IG T-Test on an imported/ existing dataset

In [11]:
# Importing the weight height data for an IG t test: 

w_h_data = pd.read_csv("weight height IG t test.csv")

w_h_data.head()

Unnamed: 0,Participant_ID,Weight,Height,Age,Sex
0,1,81.5,182.3,32,0
1,2,88.8,182.6,30,0
2,3,45.5,167.3,20,1
3,4,85.4,180.3,19,0
4,5,72.8,170.5,26,1


In [12]:
# Examining data types shows sex is already dummy coded and an integer:
w_h_data.dtypes

Participant_ID      int64
Weight            float64
Height            float64
Age                 int64
Sex                 int64
dtype: object

In [13]:
# IG test comparing weight with sex as grouping variable:
res = stats.ttest_ind(w_h_data[w_h_data.Sex == 0].Weight, w_h_data[w_h_data.Sex == 1].Weight, equal_var = False)

In [14]:
res

Ttest_indResult(statistic=5.354258568077058, pvalue=5.664350507458935e-06)

In [15]:
# Constructing 95% CIs for the t test:

sigma = w_h_data[w_h_data.Sex == 0].Weight.std()/math.sqrt(50)  # Sample std/ sample size

stats.t.interval(0.95, df = 49, loc = w_h_data[w_h_data.Sex == 0].Weight.mean(), scale = sigma) 

# The arguments in the interval method above are (confidence interval, df, sample mean, std dev estimate)
# The output gives us our 95% CIs. 

(73.63474735153451, 80.20161628482916)

In [16]:
# The sample mean for males and females was:

print("Male Weight mean: ", w_h_data[w_h_data.Sex == 0].Weight.mean())
print("Female Weight mean: ", w_h_data[w_h_data.Sex == 1].Weight.mean())

Male Weight mean:  76.91818181818184
Female Weight mean:  61.64999999999999


In [17]:
# A second IG t test with height as the DV difference between sexes:

res_2 = stats.ttest_ind(w_h_data[w_h_data.Sex == 0].Height, w_h_data[w_h_data.Sex == 1].Height, equal_var = False)

In [18]:
res_2

# Both produced significant results. 

Ttest_indResult(statistic=5.616972413592523, pvalue=1.5053462128673466e-06)

In [19]:
# Constructing 95% CIs for the t test:

sigma = w_h_data[w_h_data.Sex == 0].Height.std()/math.sqrt(50)  # Sample std/ sample size

stats.t.interval(0.95, df = 49, loc = w_h_data[w_h_data.Sex == 0].Height.mean(), scale = sigma) 

# The arguments in the interval method above are (confidence interval, df, sample mean, std dev estimate)
# The output gives us our 95% CIs. 

(173.71027038543272, 177.96245688729456)

In [20]:
print("Male Height mean: ", w_h_data[w_h_data.Sex == 0].Height.mean())
print("Female Height mean: ", w_h_data[w_h_data.Sex == 1].Height.mean())

Male Height mean:  175.83636363636364
Female Height mean:  164.64642857142863


## Running a paired-samples T-Test using an imported/ existing dataset 

In [23]:
test_score = pd.read_csv("test score progress.csv")

In [24]:
test_score.head()

Unnamed: 0,Participant_ID,Test1_Score,Test2_Score
0,1,45,55
1,2,34,46
2,3,52,65
3,4,67,74
4,5,40,50


In [25]:
# Running a RM t test using the above data. 

stats.ttest_rel(a = test_score.Test1_Score, b = test_score.Test2_Score)

Ttest_relResult(statistic=-23.728962983778086, pvalue=1.6043341681559642e-28)

In [26]:
# The test produced a significant result. Printing the group means for comparison:

print("Test 1 mean: ", test_score.Test1_Score.mean())
print("Test 2 mean: ", test_score.Test2_Score.mean())

Test 1 mean:  41.36
Test 2 mean:  52.72


## In this notebook I have provided code examples of how you run one-sample, independent groups and paired-samples t-tests using both simulated data and data imported from real datasets. 

### The examples used robust tests that did not assume equal variances by setting the equal_var argument to False. 