In [None]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import time

# Define Functions

In [None]:
def for_loop(n: int) -> list:
    a = []
    for i in range(n):
        a.append(i)
    return a

def list_comprehension(n: int) -> list:
    return [i for i in range(n)]

# Run Functions n times to gather performance data

In [None]:
for_loop_times = []

for _ in range(100):

    start = time.perf_counter()

    for_loop(100_000)

    end = time.perf_counter()

    for_loop_times.append(end-start)


In [None]:
list_comprehension_times = []

for _ in range(100):

    start = time.perf_counter()

    list_comprehension(100_000)

    end = time.perf_counter()

    list_comprehension_times.append(end-start)

# Define t-Test and run it

In [None]:
def t_test(sample1: np.array, sample2: np.array) -> float:

    # get means of the two samples
    mean1 = np.mean(sample1)
    mean2 = np.mean(sample2)

    # get number of elements in the two samples
    n1 = len(sample1)
    n2 = len(sample2)

    # get standard deviation of the two samples
    std1 = np.std(sample1)
    std2 = np.std(sample2)

    # calculate the term for the denominator
    std12 = np.sqrt( ((n1-1)*std1**2 + (n2-1)*std2**2) / (n1 + n2 - 2)  )

    # calculate t value
    t = (mean1 - mean2) / (std12 * np.sqrt(1/n1 + 1/n2))

    return t

In [None]:
t_value = t_test(for_loop_times, list_comprehension_times)
print(f"t = {t_value:.3f}")

print(f"(Check: Scipy says: t = {stats.ttest_ind(for_loop_times, list_comprehension_times).statistic:.3f})")

# Generate t-distribution from H0 and check alpha value

In [None]:
plt.figure(figsize=(12,7))

# generate random values for mu, std for H0
random_mu = (np.random.random()-0.5)*2  # random float between -1 and 1
random_std = (np.random.random())*10  # random float between 0 and 1

# generate two samples from the SAME (mu, std) normal distribution
a, b = np.random.normal(loc=random_mu, scale=random_std, size=(2, 1_000_000)) / (np.sqrt(2) * random_std)

# get the threshold value (t value) from the difference of the distributions where 5% of all points lie outside
alpha_t = np.abs(np.percentile(a-b, 2.5))

# print the t value and whether or not to the above experiment is within these bounds (= result agrees with H0 = no effect)
print(f"alpha value: {alpha_t:.3f}")
print(f"Discard H0: {np.abs(t_value) > np.abs(alpha_t)}")


# plot the pointwise DIFFERENCE of those two samples in blue
plt.hist(a-b, bins=200, alpha=0.5, color="blue", density=True)

# for comparison: draw a sample from a t distribution in red
plt.hist(np.random.standard_t(df=198, size=1_000_000), bins=200, alpha=0.5, color="red", density=True)

# add the t value as vertical lines
plt.axvline(-alpha_t, linestyle="dashed", color="black")
plt.axvline(alpha_t, linestyle="dashed", color="black")