# 추측통계의 기본


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.animation as ani
from IPython.display import HTML

%precision 3
%matplotlib inline


In [2]:
df = pd.read_csv("../data/ch4_scores400.csv")
scores = np.array(df["score"])

dice = [1, 2, 3, 4, 5, 6]
prob = [1 / 21, 2 / 21, 3 / 21, 4 / 21, 5 / 21, 6 / 21]


## 그림 4.3 시행 횟수를 변화시킬 때 눈의 히스토그램


In [3]:
num_frame = 50
space = np.logspace(2, 5, num_frame).astype(int)
sample = np.random.choice(dice, size=100000, p=prob)


def animate(nframe):
    num_trial = space[nframe]
    ax.clear()
    ax.hist(sample[:num_trial], bins=6, range=(1, 7), density=True, rwidth=0.8)
    ax.hlines(prob, np.arange(1, 7), np.arange(2, 8), colors="gray")
    ax.set_title(f"number trial:{num_trial}")
    ax.set_xticks(np.linspace(1.5, 6.5, 6))
    ax.set_xticklabels(np.arange(1, 7))
    ax.set_ylim(0, 0.3)
    ax.set_xlabel("dice", fontsize=12)
    ax.set_ylabel("relative frequency", fontsize=12)


fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
anim = ani.FuncAnimation(fig, animate, frames=num_frame)
js_anim = HTML(anim.to_jshtml())
plt.close()

js_anim


## 그림 4.5 무작위추출로 얻은 표본 데이터의 히스토그램


In [4]:
np.random.seed(0)
sample = np.random.choice(scores, 10000)
num_frame = 50
space = np.logspace(1, 4, num_frame).astype(int)

fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)


def animate(nframe):
    num_sample = space[nframe]
    ax.clear()
    ax.hist(sample[:num_sample], bins=100, range=(0, 100), density=True)
    ax.set_xlabel("score", fontsize=12)
    ax.set_ylabel("relative frequency", fontsize=12)
    ax.set_title(f"sample size {num_sample}")
    ax.set_xlim(20, 100)


anim = ani.FuncAnimation(fig, animate, frames=num_frame)
js_anim = HTML(anim.to_jshtml())
plt.close()

js_anim


## 그림 4.6 표본평균의 분포


In [5]:
np.random.seed(0)
sample_means = [np.random.choice(scores, 20).mean() for _ in range(10000)]

num_frame = 50
space = np.logspace(1, 4, num_frame).astype(int)

fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)


def animate(nframe):
    num_trial = space[nframe]
    ax.clear()
    prob, _, _ = ax.hist(
        sample_means[:num_trial], bins=100, range=(0, 100), density=True
    )
    ax.vlines(np.mean(scores), 0, 1, "gray")
    ax.set_title(f"number trial {num_trial}")
    ax.set_xlabel("score", fontsize=12)
    ax.set_ylabel("relative frequency", fontsize=12)
    ax.set_xlim(50, 90)
    ax.set_ylim(0, prob.max() * 1.1)


anim = ani.FuncAnimation(fig, animate, frames=num_frame)
js_anim = HTML(anim.to_jshtml())
plt.close()

js_anim
