# Zadanie 1 Test Monte Carlo

In [4]:
import numpy as np

In [19]:
# 1.
arr =  np.array([6.20,4.34,8.14,6.24,3.72,3.54,4.35,2.67,7.16,6.0])
arr

array([6.2 , 4.34, 8.14, 6.24, 3.72, 3.54, 4.35, 2.67, 7.16, 6.  ])

In [20]:
mean = sum(arr) / len(arr)

sd = np.sqrt(1/len(arr) * sum((xi - mean)**2 for xi in arr))

print(f"Mean: {mean}")
print(f"Standard Deviation: {sd}")

Mean: 5.236
Standard Deviation: 1.6734049121476848


In [21]:
N_SIMULATIONS = 10000
BATCH_SIZE = 10

In [22]:
def box_muller_generate_normal(mu,sigma,size):
    """
    Generates numbers from normal distribution N(mu,sigma) using box-muller algorithm
    """
    if size % 2 != 0:
        n_pairs = (size+1) // 2
    else:
        n_pairs = size // 2
    
    # 1. Generate 2 * n_pairs numbers from uniform distribution U(0,1)
    U1 = np.random.uniform(0,1,n_pairs)
    U2 = np.random.uniform(0,1,n_pairs)

    U1[U1 == 0] = 1e-10  # Avoid log(0)
    U2[U2 == 0] = 1e-10  # Avoid log(0)

    r = np.sqrt(-2 * np.log(1-U1))
    theta = 2 * np.pi * U2

    Z0 = r * np.cos(theta)
    Z1 = r * np.sin(theta)

    Z = np.concatenate((Z0, Z1))

    Z = Z[:size]  # Trim to requested size

    return mu + sigma * Z

In [23]:
# 2. 
simulated_means = []

for i in range(N_SIMULATIONS):
    simulated_sample = box_muller_generate_normal(mean, sd, BATCH_SIZE)
    simulated_mean = sum(simulated_sample) / BATCH_SIZE
    simulated_means.append(simulated_mean)

print(f"Simulated Means (first 10): {simulated_means[:10]}")

Simulated Means (first 10): [5.612145590045395, 5.049317370572485, 4.973331188576401, 5.3816147405606705, 4.776333577785208, 5.183049380449463, 4.5006280992438885, 4.267167353438516, 5.280988297288914, 5.19222236622935]


In [24]:
# 3. confidence interval
simulated_means.sort()
lower_bound = simulated_means[int(0.025 * N_SIMULATIONS)]
upper_bound = simulated_means[int(0.975 * N_SIMULATIONS)]

print(f"95% Confidence Interval for the Mean: ({lower_bound}, {upper_bound})")

# Contains 0 ? 
if lower_bound <= 0 <= upper_bound:
    print("The confidence interval contains 0.")
else:
    print("The confidence interval does not contain 0.")

95% Confidence Interval for the Mean: (4.218339392975173, 6.294948778656084)
The confidence interval does not contain 0.


In [25]:
# Observations from the 1.
counter = sum(1 for x in arr if x > mean)
print(f"Number of observations: {counter}, larger than mean: {mean}")
print(f"Proportion: {counter / len(arr)}")

Number of observations: 5, larger than mean: 5.236
Proportion: 0.5


In [None]:
# We reject the null hypothesis that the expected value is 0, 
# becouse the confidence interval does not contain 0.

# Zadanie 2 Test bootstrap

In [26]:
arr = np.array([6.20, 4.34, 8.14, 6.24, 3.72, 3.54, 4.35, 2.67, 7.16, 6.00])

means = []

for i in range(N_SIMULATIONS):
    
    simulated_sample = []

    for j in range(10):
        rand_idx = np.random.randint(0,len(arr))
        simulated_sample.append(arr[rand_idx])
    
    simulated_mean = sum(simulated_sample) / len(simulated_sample)
    means.append(simulated_mean)


In [27]:
means.sort()

lower_bound = means[int(0.025 * N_SIMULATIONS)]
upper_bound = means[int(0.975 * N_SIMULATIONS)]

print(f"95% Confidence Interval for the Mean: ({lower_bound}, {upper_bound})")

# Contains 0 ? 
if lower_bound <= 0 <= upper_bound:
    print("The confidence interval contains 0.")
else:
    print("The confidence interval does not contain 0.")

95% Confidence Interval for the Mean: (4.208, 6.247000000000001)
The confidence interval does not contain 0.


In [28]:
counter = sum(1 for x in arr if x > mean)
print(f"Number of observations: {counter}, larger than mean: {mean}")
print(f"Proportion: {counter / len(arr)}")

Number of observations: 5, larger than mean: 5.236
Proportion: 0.5


In [None]:
# We reject the null hypothesis that the expected value is 0, 
# becouse the confidence interval does not contain 0.

# Zadanie 3 Test permutacyjny dla zmiennych niepowiÄ…zanych

In [35]:
low_intensity = [378,346,245,285,365,245,208,360,296,224,292]
high_intensity = [218,264,211,180,256,240,261,205,145,195,187,210,378,204,232,237,310]

all = low_intensity + high_intensity

print(len(all))

28


In [42]:
means = []

for i in range(N_SIMULATIONS):
    all_copy = np.copy(all)
    simulated_sample = []
    
    for j in range(11):
        rand_idx = np.random.randint(0,len(all_copy))
        simulated_sample.append(all_copy[rand_idx])
        all_copy = np.delete(all_copy,rand_idx)

    firstMean = sum(simulated_sample) / len(simulated_sample)

    secondMean = sum(all_copy) / len(all_copy)

    means.append(firstMean)
    means.append(secondMean)


In [43]:
means.sort()
lower_bound = means[int(0.025 * len(means))]
upper_bound = means[int(0.975 * len(means))]

print(f"95% Confidence Interval for the Mean: ({lower_bound}, {upper_bound})")

95% Confidence Interval for the Mean: (231.11764705882354, 281.09090909090907)
