In [8]:
import itertools
import numpy as np
import pandas as pd
import math
from scipy.stats import binom, norm

# Values from Problem 8.17
X = np.array([9, 12, 15])
n = 2 

samples = list(itertools.product(X, repeat=n))

sample_means = [np.mean(s) for s in samples]

df = pd.DataFrame(sample_means, columns=['xbar'])
dist = df.value_counts(normalize=True).reset_index(name='p(xbar)')
dist['xbar * p(xbar)'] = dist['xbar'] * dist['p(xbar)']
dist['xbar^2 * p(xbar)'] = (dist['xbar']**2) * dist['p(xbar)']

dist = dist.sort_values('xbar').reset_index(drop=True)

mu_xbar = np.sum(dist['xbar * p(xbar)'])
sigma2_xbar = np.sum(dist['xbar^2 * p(xbar)']) - mu_xbar**2

print("Sampling distribution of the mean (n=2):")
print(dist)
print("\nMean of x̄ =", mu_xbar)
print("Variance of x̄ =", sigma2_xbar)

Sampling distribution of the mean (n=2):
   xbar   p(xbar)  xbar * p(xbar)  xbar^2 * p(xbar)
0   9.0  0.111111        1.000000               9.0
1  10.5  0.222222        2.333333              24.5
2  12.0  0.333333        4.000000              48.0
3  13.5  0.222222        3.000000              40.5
4  15.0  0.111111        1.666667              25.0

Mean of x̄ = 11.999999999999998
Variance of x̄ = 3.0000000000000284


In [7]:
'''
8.21 A population consists of the four numbers 3, 7, 11, and 15. Consider all possible samples of size 2 that can be
drawn with replacement from this population. Find (a) the population mean, (b) the population standard
deviation, (c) the mean of the sampling distribution of means, and (d ) the standard deviation of the sampling
distribution of means. Verify parts (c) and (d ) directly from (a) and (b) by using suitable formulas.
'''

pop821 = np.array([3, 7, 11, 15])
sample_size = 2
mu_pop = np.mean(pop_21)
var_pop = np.mean((pop_21 - mu_pop)**2)
sd_pop = np.sqrt(var_pop)

samples = list(itertools.product(pop821, repeat=n))
sample_means = np.array([np.mean(s) for s in samples])

dist = pd.Series(sample_means).value_counts().sort_index().reset_index()
dist.columns = ['xbar', 'count']
dist['p(xbar)'] = dist['count'] / len(samples)
dist['xbar * p(xbar)'] = dist['xbar'] * dist['p(xbar)']
dist['xbar^2 * p(xbar)'] = (dist['xbar']**2) * dist['p(xbar)']

mu_xbar = dist['xbar * p(xbar)'].sum()

var_xbar = dist['xbar^2 * p(xbar)'].sum() - mu_xbar**2
sd_xbar = np.sqrt(var_xbar)

# Formula check
sd_xbar_formula = sd_pop / np.sqrt(n)

print("Population:", pop821.tolist())
print(f"(a) Population mean μ = {mu_pop:.4f}")
print(f"(b) Population standard deviation σ = {sd_pop:.4f}\n")

print("Sampling distribution of x̄ (n=2):")
print(dist[['xbar', 'count', 'p(xbar)', 'xbar * p(xbar)', 'xbar^2 * p(xbar)']], "\n")

print(f"(c) Mean of sampling distribution μ_x̄ = {mu_xbar:.4f}")
print(f"(d) Std dev of sampling distribution σ_x̄ = {sd_xbar:.4f}\n")

print("Verification:")
print(f"μ_x̄ = μ: {mu_xbar:.4f} ?= {mu_pop:.4f}")
print(f"σ_x̄ = σ/√n: {sd_xbar:.4f} ?= {sd_xbar_formula:.4f}")

Population: [3, 7, 11, 15]
(a) Population mean μ = 9.0000
(b) Population standard deviation σ = 4.4721

Sampling distribution of x̄ (n=2):
   xbar  count  p(xbar)  xbar * p(xbar)  xbar^2 * p(xbar)
0   3.0      1   0.0625          0.1875            0.5625
1   5.0      2   0.1250          0.6250            3.1250
2   7.0      3   0.1875          1.3125            9.1875
3   9.0      4   0.2500          2.2500           20.2500
4  11.0      3   0.1875          2.0625           22.6875
5  13.0      2   0.1250          1.6250           21.1250
6  15.0      1   0.0625          0.9375           14.0625 

(c) Mean of sampling distribution μ_x̄ = 9.0000
(d) Std dev of sampling distribution σ_x̄ = 3.1623

Verification:
μ_x̄ = μ: 9.0000 ?= 9.0000
σ_x̄ = σ/√n: 3.1623 ?= 3.1623


In [6]:
'''
8.34 Find the probability that of the next 200 children born, (a) less than 40% will be boys, (b) between 43% and
57% will be girls, and (c) more than 54% will be boys. Assume equal probabilities for the births of boys and
girls.
'''

n834 = 200
p834 = 0.5
mu834 = n834 * p834
sigma834 = math.sqrt(n834 * p834 * (1 - p834))

# (a) P( proportion of boys < 0.40 ) = P(X < 80)
p834_a_exact = binom.cdf(79, n834, p834)
p834_a_norm = norm.cdf((79.5 - mu834) / sigma834)

# (b) P( 0.43 <= proportion of girls <= 0.57 )
# proportion of girls = (200 - X)/200
# 0.43 <= (200 - X)/200 <= 0.57
# => 86 <= X <= 114
p834_b_exact = binom.cdf(114, n834, p834) - binom.cdf(85, n834, p834)
p834_b_norm = norm.cdf((114.5 - mu834) / sigma834) - norm.cdf((85.5 - mu834) / sigma834)

# (c) P( proportion of boys > 0.54 ) = P(X > 108)
p834_c_exact = 1 - binom.cdf(108, n834, p834)
p834_c_norm = 1 - norm.cdf((108.5 - mu834) / sigma834)

print("Results (n=200, p=0.5)")
print(f"(a) P(boys < 40%)      exact = {p834_a_exact:.4f},  normal approx = {p834_a_norm:.4f}")
print(f"(b) P(43%-57% girls)   exact = {p834_b_exact:.4f},  normal approx = {p834_b_norm:.4f}")
print(f"(c) P(boys > 54%)      exact = {p834_c_exact:.4f},  normal approx = {p834_c_norm:.4f}")

Results (n=200, p=0.5)
(a) P(boys < 40%)      exact = 0.0018,  normal approx = 0.0019
(b) P(43%-57% girls)   exact = 0.9600,  normal approx = 0.9597
(c) P(boys > 54%)      exact = 0.1146,  normal approx = 0.1147


In [5]:
'''
8.49 The credit hour distribution at Metropolitan Technological College is as follows:

hour_dis = {
	'p(x)': [0.1, 0.2, 0.4, 0.2, 0.1],
	'x': [6, 9, 12, 15, 18]
}

Find mu and sigma^2. Give the 25 (with replacement) possible samples of size 2, their means, and their
probabilities.
'''

x849 = np.array([6, 9, 12, 15, 18])          # credit hours
p849 = np.array([0.1, 0.2, 0.4, 0.2, 0.1])   # probabilities

# (a) Compute mu and sigma^2
mu849 = np.sum(x849 * p849)
E_x2 = np.sum((x849**2) * p849)
sigma2_849 = E_x2 - mu849**2

print(f"(a) μ = {mu849:.4f}")
print(f"(a) σ² = {sigma2_849:.4f}\n")

# (b) All 25 samples of size 2 with replacement
samples849 = list(itertools.product(x849, repeat=2))
sample_means849 = [np.mean(s) for s in samples849]

# Probability of each sample = product of probabilities (since independent with replacement)
probs849 = [p849[x849.tolist().index(s[0])] * p849[x849.tolist().index(s[1])] for s in samples849]

# Build dataframe of samples, means, and probabilities
df849 = pd.DataFrame({
    'sample': samples849,
    'mean': sample_means849,
    'P(sample)': probs849
})

# (Optional) group by distinct sample means to get sampling distribution
dist849 = df849.groupby('mean')['P(sample)'].sum().reset_index()

print("25 samples of size 2 with their means and probabilities:")
print(df849)

print("\nSampling distribution of the mean (grouped):")
print(dist849)

(a) μ = 12.0000
(a) σ² = 10.8000

25 samples of size 2 with their means and probabilities:
      sample  mean  P(sample)
0     (6, 6)   6.0       0.01
1     (6, 9)   7.5       0.02
2    (6, 12)   9.0       0.04
3    (6, 15)  10.5       0.02
4    (6, 18)  12.0       0.01
5     (9, 6)   7.5       0.02
6     (9, 9)   9.0       0.04
7    (9, 12)  10.5       0.08
8    (9, 15)  12.0       0.04
9    (9, 18)  13.5       0.02
10   (12, 6)   9.0       0.04
11   (12, 9)  10.5       0.08
12  (12, 12)  12.0       0.16
13  (12, 15)  13.5       0.08
14  (12, 18)  15.0       0.04
15   (15, 6)  10.5       0.02
16   (15, 9)  12.0       0.04
17  (15, 12)  13.5       0.08
18  (15, 15)  15.0       0.04
19  (15, 18)  16.5       0.02
20   (18, 6)  12.0       0.01
21   (18, 9)  13.5       0.02
22  (18, 12)  15.0       0.04
23  (18, 15)  16.5       0.02
24  (18, 18)  18.0       0.01

Sampling distribution of the mean (grouped):
   mean  P(sample)
0   6.0       0.01
1   7.5       0.04
2   9.0       0.12
3  10.5