#Confidence interval for difference of two means, dependent samples
#Weight loss example, lbs

In [1]:
import pandas as pd
import numpy as np

In [6]:
df = pd.read_csv('weight_loss.csv', sep = '\t')
df

Unnamed: 0,Subject,Weight before (lbs),Weight after (lbs),Difference
0,1,228.58,204.74,-23.83
1,2,244.01,223.95,-20.06
2,3,262.46,232.94,-29.52
3,4,224.32,212.04,-12.28
4,5,202.14,191.74,-10.41
5,6,246.98,233.47,-13.51
6,7,195.86,177.6,-18.25
7,8,231.88,213.85,-18.03
8,9,243.32,218.85,-24.47
9,10,266.74,236.86,-29.87


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Subject              10 non-null     int64  
 1   Weight before (lbs)  10 non-null     float64
 2   Weight after (lbs)   10 non-null     float64
 3   Difference           10 non-null     float64
dtypes: float64(3), int64(1)
memory usage: 452.0 bytes


###Calculate the mean and standard deviation of the difference

In [9]:
dif_sample = df['Difference'].values

In [10]:
dif_mean = round((np.sum(dif_sample)/len(dif_sample)),2)

In [12]:
dif_sample_variance = round((np.sum((dif_sample-dif_mean)**2)/(len(dif_sample)-1)),2)

In [14]:
dif_sample_std = round((pow((dif_sample_variance), 1/2)), 2)

In [43]:
dif_sample_standard_error = round((dif_sample_std/pow((len(dif_sample)),1/2)),2)

2.17

In [80]:
print('difference sample mean = ', dif_mean)
print('difference sample variance = ', dif_sample_variance)
print('difference sample standard deviation = ', dif_sample_std)
print('difference sample standard error = ', dif_sample_standard_error)

difference sample mean =  -20.02
difference sample variance =  47.04
difference sample standard deviation =  6.86
difference sample standard error =  2.17


###Determine the appropriate statistic to use
###Population variance is unknown
###We have a small sample
###We assume that the population is normally distributed
###The appropriate statistic to use is the t-statistic

In [31]:
t_table = pd.read_csv('t_table.csv', sep='\t')
t_table.head(12)

Unnamed: 0,d.f. / α,0.1,0.05,0.025,0.01,0.005
0,1,3.078,6.314,12.706,31.821,63.657
1,2,1.886,2.92,4.303,6.965,9.925
2,3,1.638,2.353,3.182,4.541,5.841
3,4,1.533,2.132,2.776,3.747,4.604
4,5,1.476,2.015,2.571,3.365,4.032
5,6,1.44,1.943,2.447,3.143,3.707
6,7,1.415,1.895,2.365,2.998,3.499
7,8,1.397,1.86,2.306,2.896,3.355
8,9,1.383,1.833,2.262,2.821,3.25
9,10,1.372,1.812,2.228,2.764,3.169


In [82]:
def t_statistic(alpha, length):
    index = length-2
    alpha_half = alpha/2
    value = t_table.iloc[index, t_table.columns.get_loc(f'{alpha_half}')]
    return float(value)

###Calculate the 95% confidence interval ###alpha/2 is 0.025 ###(t-1) is 9
###t-statistic is 2.262

In [83]:
t_stat_95 = t_statistic(0.05, 10)
print(t_stat)

2.262


In [84]:
confidence_interval_low = round((dif_mean-t_stat_95*dif_sample_standard_error),2)
confidence_interval_up = round((dif_mean+t_stat_95*dif_sample_standard_error),2)

###for 90% and 99%

In [85]:
t_stat_90 = t_statistic(0.1, 10)
print(t_stat_90)

1.833


In [86]:
confidence_interval_90_low = round((dif_mean-t_stat_90*dif_sample_standard_error),2)
confidence_interval_90_up = round((dif_mean+t_stat_90*dif_sample_standard_error),2)

In [87]:
t_stat_99 = t_statistic(0.01, 10)
print(t_stat_99)

3.25


In [88]:
confidence_interval_99_low = round((dif_mean-t_stat_99*dif_sample_standard_error),2)
confidence_interval_99_up = round((dif_mean+t_stat_99*dif_sample_standard_error),2)

In [90]:
print(f'confidence interval for 90% is [{confidence_interval_90_low}, {confidence_interval_90_up}] {abs(round((confidence_interval_90_up-confidence_interval_90_low),2))} width')
print(f'confidence interval for 95% is [{confidence_interval_low}, {confidence_interval_up}] {abs(confidence_interval_up-confidence_interval_low)} width')
print(f'confidence interval for 99% is [{confidence_interval_99_low}, {confidence_interval_99_up}] {abs(round((confidence_interval_99_up-confidence_interval_99_low),2))} width')

confidence interval for 90% is [-24.0, -16.04] 7.96 width
confidence interval for 95% is [-24.93, -15.11] 9.82 width
confidence interval for 99% is [-27.07, -12.97] 14.1 width
