In [153]:
import pandas as pd
import numpy as np
from scipy import stats
from bokeh.plotting import figure, show
from bokeh.layouts import row, gridplot
from bokeh.io import output_notebook
output_notebook()

In [141]:
student_p, welch_p = [], []
for _ in range(1000000):
    rand_1 = np.random.normal(loc=3, scale=1, size=1000)
    rand_2 = np.random.normal(loc=3, scale=1, size=1000)
    student_p.append(stats.ttest_ind(rand_1, rand_2)[1])
    welch_p.append(stats.ttest_ind(rand_1, rand_2, equal_var=False)[1])

In [142]:
plot_dim = 480

p1 = figure(title="Student P-value Distribution", tools="save",
            background_fill_color="#E8DDCB", plot_width=plot_dim, plot_height=plot_dim)
hist, edges = np.histogram(student_p, density=False, bins=20)

p1.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
        fill_color="#036564", line_color="#033649")

p2 = figure(title="Welch P-value Distribution", tools="save",
            background_fill_color="#E8DDCB", plot_width=plot_dim, plot_height=plot_dim)
hist, edges = np.histogram(welch_p, density=False, bins=20)

p2.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
        fill_color="#036564", line_color="#033649")

show(row(p1, p2))

In [117]:
sig_comp, non_sig_comp = [], []
while len(sig_comp) == 0:
    rand_1 = np.random.normal(loc=3, scale=1, size=1000)
    rand_2 = np.random.normal(loc=3, scale=1, size=1000)
    p = stats.ttest_ind(rand_1, rand_2)[1]
    if p < 0.05: 
        sig_comp.append(rand_1); sig_comp.append(rand_2)    
    elif len(non_sig_comp) == 0:
        non_sig_comp.append(rand_1); non_sig_comp.append(rand_2)

In [120]:
p3 = figure(title="Signifcant Comparison", tools="save",
            background_fill_color="#E8DDCB", plot_width=plot_dim, plot_height=plot_dim)
hist_1, edges = np.histogram(sig_comp[0], density=True, bins=25)
hist_2, edges = np.histogram(sig_comp[1], density=True, bins=25)
p3.quad(top=hist_1, bottom=0, left=edges[:-1], right=edges[1:],
        fill_color="red", line_color="#033649", fill_alpha=0.5)
p3.quad(top=hist_2, bottom=0, left=edges[:-1], right=edges[1:],
        fill_color="blue", line_color="#033649", fill_alpha=0.5)

p4 = figure(title="None-Signifcant Comparison", tools="save",
            background_fill_color="#E8DDCB", plot_width=plot_dim, plot_height=plot_dim)
hist_3, edges = np.histogram(non_sig_comp[0], density=True, bins=25)
hist_4, edges = np.histogram(non_sig_comp[1], density=True, bins=25)
p4.quad(top=hist_3, bottom=0, left=edges[:-1], right=edges[1:],
        fill_color="red", line_color="#033649", fill_alpha=0.5)
p4.quad(top=hist_4, bottom=0, left=edges[:-1], right=edges[1:],
        fill_color="blue", line_color="#033649", fill_alpha=0.5)
show(row(p3, p4))

In [150]:
# def run_test(s1, n1, s2, n2):
#     student, welch = [], []
#     for _ in range(10000):
#         rand_1 = np.random.normal(loc=3, scale=s1, size=n1)
#         rand_2 = np.random.normal(loc=3, scale=s2, size=n2)
#         student_p_low.append(stats.ttest_ind(rand_1, rand_2)[1])
#         welch_p_low.append(stats.ttest_ind(rand_1, rand_2, equal_var=False)[1])
#     return student, welch
student_p_low, welch_p_low = [], []
for _ in range(10000):
    rand_1 = np.random.normal(loc=3, scale=2.3, size=50)
    rand_2 = np.random.normal(loc=3, scale=1.7, size=100)
    student_p_low.append(stats.ttest_ind(rand_1, rand_2)[1])
    welch_p_low.append(stats.ttest_ind(rand_1, rand_2, equal_var=False)[1])
    
student_p_high, welch_p_high = [], []
for _ in range(10000):
    rand_1 = np.random.normal(loc=3, scale=2.3, size=100)
    rand_2 = np.random.normal(loc=3, scale=1.7, size=50)
    student_p_high.append(stats.ttest_ind(rand_1, rand_2)[1])
    welch_p_high.append(stats.ttest_ind(rand_1, rand_2, equal_var=False)[1])

In [156]:
p5 = figure(title="p=50, sd=2.3 vs p=100, sd=1.7", tools="save",
            background_fill_color="#E8DDCB", plot_width=plot_dim, plot_height=plot_dim)
hist_1, edges_1 = np.histogram(student_p_low, density=False, bins=20)
hist_2, edges_2 = np.histogram(welch_p_low, density=False, bins=20)
p5.quad(top=hist_1, bottom=0, left=edges_1[:-1], right=edges_1[1:],
        fill_color="red", line_color="#033649", fill_alpha=0.5, legend="Student t-test")
p5.quad(top=hist_2, bottom=0, left=edges_2[:-1], right=edges_2[1:],
        fill_color="blue", line_color="#033649", fill_alpha=0.5, legend="Welch t-test")

p6 = figure(title="p=100, sd=2.3 vs p=50, sd=1.7", tools="save",
            background_fill_color="#E8DDCB", plot_width=plot_dim, plot_height=plot_dim)
hist_3, edges_3 = np.histogram(student_p_high, density=False, bins=20)
hist_4, edges_4 = np.histogram(welch_p_high, density=False, bins=20)
p6.quad(top=hist_3, bottom=0, left=edges_3[:-1], right=edges_3[1:],
        fill_color="red", line_color="#033649", fill_alpha=0.5)
p6.quad(top=hist_4, bottom=0, left=edges_4[:-1], right=edges_4[1:],
        fill_color="blue", line_color="#033649", fill_alpha=0.5)
show(row(p5, p6))

In [161]:
student_p, welch_p = [], []
for _ in range(1000000):
    rand_1 = np.random.normal(loc=3, scale=1, size=5)
    rand_2 = np.random.normal(loc=3, scale=1, size=5)
    student_p.append(stats.ttest_ind(rand_1, rand_2)[1])
    welch_p.append(stats.ttest_ind(rand_1, rand_2, equal_var=False)[1])

In [162]:
plot_dim = 480

p1 = figure(title="Student P-value Distribution (n=5, sd=1)", tools="save",
            background_fill_color="#E8DDCB", plot_width=plot_dim, plot_height=plot_dim)
hist, edges = np.histogram(student_p, density=False, bins=20)

p1.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
        fill_color="#036564", line_color="#033649")

p2 = figure(title="Welch P-value Distribution (n=5, sd=1)", tools="save",
            background_fill_color="#E8DDCB", plot_width=plot_dim, plot_height=plot_dim)
hist, edges = np.histogram(welch_p, density=False, bins=20)

p2.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
        fill_color="#036564", line_color="#033649")

show(row(p1, p2))