# Mean confident interval

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from statsmodels.stats.weightstats import _tconfint_generic

In [2]:
df = pd.read_csv("water.txt", sep='\t')

In [3]:
df

Unnamed: 0,location,town,mortality,hardness
0,South,Bath,1247,105
1,North,Birkenhead,1668,17
2,South,Birmingham,1466,5
3,North,Blackburn,1800,14
4,North,Blackpool,1609,18
...,...,...,...,...
56,South,Walsall,1527,60
57,South,West Bromwich,1627,53
58,South,West Ham,1486,122
59,South,Wolverhampton,1485,81


In [4]:
def calculate_conf_int(data, alpha = 0.05):
    mean = data.mean()
    std = data.std(ddof=1) / np.sqrt(data.shape[0])
    return _tconfint_generic(mean, std, data.shape[0] - 1, alpha, 'two-sided')

## Calculate 95% confident interval for mortality

In [5]:
mortality_int = calculate_conf_int(df.mortality)

print(f"Confident interval for mortality: {mortality_int}")
print(f"Lower bound: {np.round(mortality_int[0], 4)}")

Confident interval for mortality: (1476.0833413552848, 1572.2117406119285)
Lower bound: 1476.0833


## Calculate 95% confident interval for mortality in south

In [6]:
mortality_south = df[df.location == "South"].mortality
mortality_south_int = calculate_conf_int(mortality_south)

print(f"Confident interval for mortality in south: {mortality_south_int}")
print(f"Upper bound: {np.round(mortality_south_int[1], 4)}")

Confident interval for mortality in south: (1320.1517462936238, 1433.463638321761)
Upper bound: 1433.4636


## Calculate 95% confident interval for mortality in north

In [7]:
mortality_north = df[df.location == "North"].mortality
mortality_north_int = calculate_conf_int(mortality_north)

print(f"Confident interval for mortality in north: {mortality_north_int}")

Confident interval for mortality in north: (1586.5605251961385, 1680.6394748038613)


## Calculate 95% confident interval for hardness and compare

In [8]:
hardness_south = df[df.location == "South"].hardness
hardness_south_int = calculate_conf_int(hardness_south)

print(f"Confident interval for hardness in south: {hardness_south_int}")

Confident interval for hardness in south: (53.467198692036106, 86.07126284642544)


In [9]:
hardness_north = df[df.location == "North"].hardness
hardness_north_int = calculate_conf_int(hardness_north)

print(f"Confident interval for hardness in south: {hardness_north_int}")

Confident interval for hardness in south: (21.42248728572426, 39.37751271427574)


## Find sample size for 95% confident interval with width 0.2 of normal distribution($\sigma = 1$)

In [10]:
sample_size = np.ceil((norm.ppf(1-0.05/2) / 0.1)**2)
print(f"Sampe size of normal distribution: {sample_size}")

Sampe size of normal distribution: 385.0
