In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm,t

# t-> t-distribution
# norm -> normal distribution

# z alpha/2 -> *_norm.ppf(1 - alpha/2)
# 주로 z_norm = norm(loc = 0, scale = 1) 사용함!!!!

In [None]:
can = np.array([101.8, 101.5, 102.6, 101, 101.8, 96.8, 102.4, 100
                ,98.8, 98.1,98.8, 98, 99.4, 95.5, 100.1, 100.5, 97.4
                ,100.2, 101.4, 98.7,101.4, 99.4, 101.7, 99, 99.7, 98.9
                ,99.5, 100, 99.7, 100.9,99.7, 99, 98.8, 99.7, 100.9, 99.9
                ,97.5, 101.5, 98.2, 99.2,98.6, 101.4, 102.1, 102.9, 100.8
                ,99.4, 103.7, 100.3, 100.2, 101.1,101.8, 100, 101.2, 100.5
                ,101.2, 101.6, 99.9, 100.5, 100.4, 98.1,100.1, 101.6, 99.3
                ,96.1, 100, 99.7, 99.7, 99.4, 101.5, 100.9,101.2, 99.9, 99.1
                ,100.7, 100.8, 100.8, 101.4, 100.3, 98.4,97.2])

In [None]:
# point estimator of the mean and 95% confidence intervals of the mean

n = len(can)
mean_can = np.mean(can)
std_can = np.std(can, ddof=1) # -> 표준편차 (배열에 있는 모든 값의 표준 편차를 계산)
can_norm = norm(loc = 0, scale = 1)
# loc = 평균, scale = 표준편차


# (1 - alpha)%의 신뢰구간 구하는 법!
ll = mean_can - can_norm.ppf(0.975) * std_can/np.sqrt(n)
ul = mean_can + can_norm.ppf(0.975) * std_can/np.sqrt(n)

print("point estimator of mean is %.2f" %mean_can)
print("95% confidence intervals is {} and {}".format(np.round(ll,4),np.round(ul,4)))

point estimator of mean is 100.04
95% confidence intervals is 99.7067 and 100.3733


In [None]:
# 이항분포 (binomial distribution) -> B(n,p)

n = 250
x = 70
p_hat = x/n
# n = 전체, x = 특정부분 , p_hat = 확률
z_norm = norm(loc=0,scale=1)

ll = p_hat - z_norm.ppf(0.95) * np.sqrt(p_hat * (1 - p_hat)/n)
ul = p_hat + z_norm.ppf(0.95) * np.sqrt(p_hat * (1 - p_hat)/n)

print("The propotion is :", p_hat)
print("90% confidence intervals of it is {} and {}".format(np.round(ll,4),np.round(ul,4)))

The propotion is : 0.28
90% confidence intervals of it is 0.2333 and 0.3267


In [None]:
# variance (분산)
# -> (1 - alpha)% confidence intervals = (1-alpha)% 신뢰구간

from scipy.stats import chi2

n= len(can)
s2 = np.var(can, ddof=1) # np.var() -> 분산 계산
chi_dist = chi2(n-1)

ll = (n-1) * s2/chi_dist.ppf(0.975)
ul = (n-1) * s2/chi_dist.ppf(0.025)

print("The variance estimator is :", s2)
print("95% confidence intervals of it is {} and {}".format(np.round(ll,4),np.round(ul,4)))

The variance estimator is : 2.3130632911392417
95% confidence intervals of it is 1.7325 and 3.2452


In [None]:
# Difference of two means -> 두 평균의 차이

A_group = can[0:10]
B_group = can[-10:]

n1 = len(A_group)
n2 = len(B_group)

# --------------------------------------------------------------------------------------------------------
# 분산의 합동분산...? 말 이상하니까 그냥 메모만 해두고 필요할때 쓰자!!!!!!!1
s2_pool = ((n1-1) * np.var(A_group, ddof=1) + (n2-1) * np.var(B_group, ddof=1)) / (n1 + n2 - 2)
print("pooled estimator of the variance is", np.round(s2_pool,4))
# --------------------------------------------------------------------------------------------------------

a_mean = np.mean(A_group)
b_mean = np.mean(B_group)
t_can = t(n1 + n2 - 2)

# 주의! 원래쓰던 Z alpha/2가 아니라서!! -> t_can 쓰는거 잊지말기 메모!!
ll = (a_mean - b_mean) - t_can.ppf(0.975) * np.sqrt(s2_pool) * np.sqrt(1/n1 + 1/n2)
ul = (a_mean - b_mean) + t_can.ppf(0.975) * np.sqrt(s2_pool) * np.sqrt(1/n1 + 1/n2)

print("The Difference of two means :", np.round(a_mean - b_mean,4))
print("95% of CI of the difference of two means is {} and {}".format(np.round(ll,4), np.round(ul,4)))

pooled estimator of the variance is 2.8729
The Difference of two means : 0.5
95% of CI of the difference of two means is -1.0925 and 2.0925


In [None]:
# Difference of two proportions -> 두 비율의 차이

n1 = n2 = 100
p1_hat = 62/100
p2_hat = 29/100
z_norm = norm(loc = 0, scale = 1)
ll = (p1_hat - p2_hat) - z_norm.ppf(0.975) * np.sqrt(p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat)/ n2 )
ul = (p1_hat - p2_hat) + z_norm.ppf(0.975) * np.sqrt(p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat)/ n2 )

print("The difference of two proportions :", np.round(p1_hat - p2_hat,4)) # 두 비율의 차이
print("95% of CI of the differnce of two proportions is {} and {}".format(np.round(ll,4),np.round(ul,4))) # 두 비율의 차이를 구간으로 이해!
# CI = confidence interval = 신뢰구간!

The difference of two proportions : 0.33
95% of CI of the differnce of two proportions is 0.1998 and 0.4602
