Skip to content

Commit dbfabac

Browse files
jstacclaude
andcommitted
Improve code quality: naming, PEP 8, line length, grid resolution
- Rename class frequentist → Frequentist (PEP 8) - Rename Bay_stat → bayes (snake_case for instances) - Rename ii → i/post, num → n_obs, num_list → n_obs_list, kk → k, K → head_counts, comp → table, step_num → n_obs, npt → n_thetas, nn → n_ns, nI → n_Is - Replace (sample <= θ) * 1 with .astype(int) for consistency - Shorten docstrings to fit within 80 characters - Break long code lines (plot calls, list comprehensions, titles) - Increase θ grid from 100 to 1000 points for smoother density plots - Use f-strings with comma formatting for plot labels Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent fd69f13 commit dbfabac

1 file changed

Lines changed: 96 additions & 130 deletions

File tree

lectures/prob_meaning.md

Lines changed: 96 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -138,68 +138,44 @@ As usual, a law of large numbers justifies this answer.
138138
Here is one solution:
139139

140140
```{code-cell} ipython3
141-
class frequentist:
141+
class Frequentist:
142142
143143
def __init__(self, θ, n, I):
144-
145-
'''
146-
initialization
147-
-----------------
148-
parameters:
149-
θ : probability that one toss of a coin will be a head with Y = 1
150-
n : number of independent flips in each independent sequence of draws
151-
I : number of independent sequence of draws
152-
153-
'''
154-
155144
self.θ, self.n, self.I = θ, n, I
156145
157146
def binomial(self, k):
158-
159-
'''compute the theoretical probability for specific input k'''
160-
161-
θ, n = self.θ, self.n
147+
'''Compute the theoretical probability.'''
162148
self.k = k
163-
self.P = binom.pmf(k, n, θ)
149+
self.P = binom.pmf(k, self.n, self.θ)
164150
165151
def draw(self):
166-
167-
'''draw n independent flips for I independent sequences'''
168-
152+
'''Draw n independent flips for I sequences.'''
169153
θ, n, I = self.θ, self.n, self.I
170154
sample = np.random.rand(I, n)
171-
Y = (sample <= θ) * 1
172-
self.Y = Y
173-
174-
def compute_fk(self, kk):
155+
self.Y = (sample <= θ).astype(int)
175156
176-
'''compute f_{k}^I for specific input k'''
177-
178-
Y, I = self.Y, self.I
179-
K = np.sum(Y, 1)
180-
f_kI = np.sum(K == kk) / I
181-
self.f_kI = f_kI
182-
self.kk = kk
157+
def compute_fk(self, k):
158+
'''Compute f_k^I for a given k.'''
159+
head_counts = np.sum(self.Y, axis=1)
160+
self.f_kI = np.sum(head_counts == k) / self.I
183161
184162
def compare(self):
185-
186-
'''compute and print the comparison'''
187-
163+
'''Compute and print the comparison.'''
188164
n = self.n
189-
comp = pt.PrettyTable()
190-
comp.field_names = ['k', 'Theoretical', 'Frequentist']
165+
table = pt.PrettyTable()
166+
table.field_names = ['k', 'Theoretical', 'Frequentist']
191167
self.draw()
192168
for i in range(n+1):
193169
self.binomial(i)
194170
self.compute_fk(i)
195-
comp.add_row([i, self.P, self.f_kI])
196-
print(comp)
171+
table.add_row([i, self.P, self.f_kI])
172+
print(table)
197173
```
198174

199175
```{code-cell} ipython3
200176
θ, n, k, I = 0.7, 20, 10, 1_000_000
201177
202-
freq = frequentist(θ, n, I)
178+
freq = Frequentist(θ, n, I)
203179
204180
freq.compare()
205181
```
@@ -222,12 +198,12 @@ $$
222198
We'll vary $\theta$ from $0.01$ to $0.99$ and plot outcomes against $\theta$.
223199

224200
```{code-cell} ipython3
225-
θ_low, θ_high, npt = 0.01, 0.99, 50
226-
thetas = np.linspace(θ_low, θ_high, npt)
201+
θ_low, θ_high, n_thetas = 0.01, 0.99, 50
202+
thetas = np.linspace(θ_low, θ_high, n_thetas)
227203
P = []
228204
f_kI = []
229-
for i in range(npt):
230-
freq = frequentist(thetas[i], n, I)
205+
for i in range(n_thetas):
206+
freq = Frequentist(thetas[i], n, I)
231207
freq.binomial(k)
232208
freq.draw()
233209
freq.compute_fk(k)
@@ -255,12 +231,12 @@ Now we fix $\theta=0.7, k=10, I=1,000,000$ and vary $n$ from $1$ to $100$.
255231
Then we'll plot outcomes.
256232

257233
```{code-cell} ipython3
258-
n_low, n_high, nn = 1, 100, 50
259-
ns = np.linspace(n_low, n_high, nn, dtype='int')
234+
n_low, n_high, n_ns = 1, 100, 50
235+
ns = np.linspace(n_low, n_high, n_ns, dtype='int')
260236
P = []
261237
f_kI = []
262-
for i in range(nn):
263-
freq = frequentist(θ, ns[i], I)
238+
for i in range(n_ns):
239+
freq = Frequentist(θ, ns[i], I)
264240
freq.binomial(k)
265241
freq.draw()
266242
freq.compute_fk(k)
@@ -286,13 +262,13 @@ plt.show()
286262
Now we fix $\theta=0.7, n=20, k=10$ and vary $\log(I)$ from $2$ to $6$.
287263

288264
```{code-cell} ipython3
289-
I_log_low, I_log_high, nI = 2, 6, 200
290-
log_Is = np.linspace(I_log_low, I_log_high, nI)
265+
I_log_low, I_log_high, n_Is = 2, 6, 200
266+
log_Is = np.linspace(I_log_low, I_log_high, n_Is)
291267
Is = np.power(10, log_Is).astype(int)
292268
P = []
293269
f_kI = []
294-
for i in range(nI):
295-
freq = frequentist(θ, n, Is[i])
270+
for i in range(n_Is):
271+
freq = Frequentist(θ, n, Is[i])
296272
freq.binomial(k)
297273
freq.draw()
298274
freq.compute_fk(k)
@@ -430,85 +406,64 @@ Now please pretend that the true value of $\theta = .4$ and that someone who doe
430406
class Bayesian:
431407
432408
def __init__(self, θ=0.4, n=1_000_000, α=0.5, β=0.5):
433-
"""
434-
Parameters:
409+
'''
410+
Parameters
435411
----------
436-
θ : float, ranging from [0,1].
437-
probability that one toss of a coin will be a head with Y = 1
438-
439-
n : int.
440-
number of independent flips in an independent sequence of draws
441-
442-
α&β : int or float.
443-
parameters of the prior distribution on θ
444-
445-
"""
412+
θ : Probability of heads on each flip.
413+
n : Number of flips in the sequence.
414+
α, β : Parameters of the beta prior on θ.
415+
'''
446416
self.θ, self.n, self.α, self.β = θ, n, α, β
447417
self.prior = st.beta(α, β)
448418
449419
def draw(self):
450-
"""
451-
simulate a single sequence of draws of length n, given probability θ
452-
453-
"""
420+
'''Simulate a sequence of n coin flips.'''
454421
array = np.random.rand(self.n)
455422
self.draws = (array < self.θ).astype(int)
456423
457-
def form_single_posterior(self, step_num):
458-
"""
459-
form a posterior distribution after observing the first step_num elements of the draws
460-
461-
Parameters
462-
----------
463-
step_num: int.
464-
number of steps observed to form a posterior distribution
465-
466-
Returns
467-
------
468-
the posterior distribution for sake of plotting in the subsequent steps
424+
def form_single_posterior(self, n_obs):
425+
'''Return the posterior after the first n_obs flips.'''
426+
heads = self.draws[:n_obs].sum()
427+
tails = n_obs - heads
428+
return st.beta(self.α + heads, self.β + tails)
469429
470-
"""
471-
heads_num = self.draws[:step_num].sum()
472-
tails_num = step_num - heads_num
473-
474-
return st.beta(self.α+heads_num, self.β+tails_num)
475-
476-
def form_posterior_series(self,num_obs_list):
477-
"""
478-
form a series of posterior distributions that form after observing different number of draws.
479-
480-
Parameters
481-
----------
482-
num_obs_list: a list of int.
483-
a list of the number of observations used to form a series of posterior distributions.
484-
485-
"""
430+
def form_posterior_series(self, n_obs_list):
431+
'''Form posteriors for each sample size in n_obs_list.'''
486432
self.posterior_list = []
487-
for num in num_obs_list:
488-
self.posterior_list.append(self.form_single_posterior(num))
433+
for n_obs in n_obs_list:
434+
self.posterior_list.append(
435+
self.form_single_posterior(n_obs)
436+
)
489437
```
490438

491439
**d)** Please plot the posterior distribution for $\theta$ as a function of $\theta$ as $n$ grows from $1, 2, \ldots$.
492440

493441
```{code-cell} ipython3
494-
Bay_stat = Bayesian()
495-
Bay_stat.draw()
442+
bayes = Bayesian()
443+
bayes.draw()
496444
497-
num_list = [1, 2, 3, 4, 5, 10, 20, 30, 50, 70, 100, 300, 500, 1000, # this line for finite n
498-
5000, 10_000, 50_000, 100_000, 200_000, 300_000] # this line for approximately infinite n
445+
n_obs_list = [1, 2, 3, 4, 5, 10, 20, 30, 50, 70,
446+
100, 300, 500, 1000,
447+
5000, 10_000, 50_000, 100_000,
448+
200_000, 300_000]
499449
500-
Bay_stat.form_posterior_series(num_list)
450+
bayes.form_posterior_series(n_obs_list)
501451
502-
θ_values = np.linspace(0.01, 1, 100)
452+
θ_values = np.linspace(0.01, 1, 1000)
503453
504454
fig, ax = plt.subplots(figsize=(10, 6))
505455
506-
ax.plot(θ_values, Bay_stat.prior.pdf(θ_values), label='Prior Distribution', color='k', linestyle='--')
456+
ax.plot(θ_values, bayes.prior.pdf(θ_values),
457+
label='Prior Distribution', color='k',
458+
linestyle='--')
507459
508-
for ii, num in enumerate(num_list[:14]):
509-
ax.plot(θ_values, Bay_stat.posterior_list[ii].pdf(θ_values), label='Posterior with n = %d' % num)
460+
for i, n_obs in enumerate(n_obs_list[:14]):
461+
posterior = bayes.posterior_list[i]
462+
ax.plot(θ_values, posterior.pdf(θ_values),
463+
label=f'Posterior with n = {n_obs}')
510464
511-
ax.set_title('P.D.F of Posterior Distributions', fontsize=15)
465+
ax.set_title('P.D.F of Posterior Distributions',
466+
fontsize=15)
512467
ax.set_xlabel(r"$\theta$", fontsize=15)
513468
514469
ax.legend(fontsize=11)
@@ -518,13 +473,13 @@ plt.show()
518473
**e)** For various $n$'s, please describe and compute $.05$ and $.95$ quantiles for posterior probabilities.
519474

520475
```{code-cell} ipython3
521-
lower_bound = [ii.ppf(0.05) for ii in Bay_stat.posterior_list[:14]]
522-
upper_bound = [ii.ppf(0.95) for ii in Bay_stat.posterior_list[:14]]
476+
lower_bound = [post.ppf(0.05) for post in bayes.posterior_list[:14]]
477+
upper_bound = [post.ppf(0.95) for post in bayes.posterior_list[:14]]
523478
524479
interval_df = pd.DataFrame()
525480
interval_df['upper'] = upper_bound
526481
interval_df['lower'] = lower_bound
527-
interval_df.index = num_list[:14]
482+
interval_df.index = n_obs_list[:14]
528483
interval_df = interval_df.T
529484
interval_df
530485
```
@@ -548,14 +503,20 @@ $$
548503
```{code-cell} ipython3
549504
left_value, right_value = 0.45, 0.55
550505
551-
posterior_prob_list=[ii.cdf(right_value)-ii.cdf(left_value) for ii in Bay_stat.posterior_list]
506+
posterior_prob_list = [
507+
post.cdf(right_value) - post.cdf(left_value)
508+
for post in bayes.posterior_list
509+
]
552510
553511
fig, ax = plt.subplots(figsize=(8, 5))
554512
ax.plot(posterior_prob_list)
555-
ax.set_title('Posterior Probabililty that '+ r"$\theta$" +' Ranges from %.2f to %.2f'%(left_value, right_value),
556-
fontsize=13)
513+
ax.set_title(
514+
r'Posterior Probability that $\theta$'
515+
f' Ranges from {left_value:.2f}'
516+
f' to {right_value:.2f}',
517+
fontsize=13)
557518
ax.set_xticks(np.arange(0, len(posterior_prob_list), 3))
558-
ax.set_xticklabels(num_list[::3])
519+
ax.set_xticklabels(n_obs_list[::3])
559520
ax.set_xlabel('Number of Observations', fontsize=11)
560521
561522
plt.show()
@@ -584,10 +545,10 @@ Using the Python class we made above, we can see the evolution of posterior dist
584545
```{code-cell} ipython3
585546
fig, ax = plt.subplots(figsize=(10, 6))
586547
587-
for ii, num in enumerate(num_list[14:]):
588-
ii += 14
589-
ax.plot(θ_values, Bay_stat.posterior_list[ii].pdf(θ_values),
590-
label='Posterior with n=%d thousand' % (num/1000))
548+
for i, n_obs in enumerate(n_obs_list[14:]):
549+
posterior = bayes.posterior_list[i + 14]
550+
ax.plot(θ_values, posterior.pdf(θ_values),
551+
label=f'Posterior with n = {n_obs:,}')
591552
592553
ax.set_title('P.D.F of Posterior Distributions', fontsize=15)
593554
ax.set_xlabel(r"$\theta$", fontsize=15)
@@ -604,21 +565,23 @@ Here the posterior mean converges to $0.4$ while the posterior standard deviat
604565
To show this, we compute the means and variances statistics of the posterior distributions.
605566

606567
```{code-cell} ipython3
607-
mean_list = [ii.mean() for ii in Bay_stat.posterior_list]
608-
std_list = [ii.std() for ii in Bay_stat.posterior_list]
568+
mean_list = [post.mean() for post in bayes.posterior_list]
569+
std_list = [post.std() for post in bayes.posterior_list]
609570
610571
fig, ax = plt.subplots(1, 2, figsize=(14, 5))
611572
612573
ax[0].plot(mean_list)
613-
ax[0].set_title('Mean Values of Posterior Distribution', fontsize=13)
574+
ax[0].set_title('Mean of Posterior Distribution',
575+
fontsize=13)
614576
ax[0].set_xticks(np.arange(0, len(mean_list), 3))
615-
ax[0].set_xticklabels(num_list[::3])
577+
ax[0].set_xticklabels(n_obs_list[::3])
616578
ax[0].set_xlabel('Number of Observations', fontsize=11)
617579
618580
ax[1].plot(std_list)
619-
ax[1].set_title('Standard Deviations of Posterior Distribution', fontsize=13)
581+
ax[1].set_title('Std Dev of Posterior Distribution',
582+
fontsize=13)
620583
ax[1].set_xticks(np.arange(0, len(std_list), 3))
621-
ax[1].set_xticklabels(num_list[::3])
584+
ax[1].set_xticklabels(n_obs_list[::3])
622585
ax[1].set_xlabel('Number of Observations', fontsize=11)
623586
624587
plt.show()
@@ -669,17 +632,20 @@ According to the Law of Large Numbers, for a large number of observations, obser
669632
Consequently, the mean of the posterior distribution converges to $0.4$ and the variance withers to zero.
670633

671634
```{code-cell} ipython3
672-
upper_bound = [ii.ppf(0.95) for ii in Bay_stat.posterior_list]
673-
lower_bound = [ii.ppf(0.05) for ii in Bay_stat.posterior_list]
635+
upper_bound = [post.ppf(0.95) for post in bayes.posterior_list]
636+
lower_bound = [post.ppf(0.05) for post in bayes.posterior_list]
674637
675638
fig, ax = plt.subplots(figsize=(10, 6))
676-
ax.scatter(np.arange(len(upper_bound)), upper_bound, label='95 th Quantile')
677-
ax.scatter(np.arange(len(lower_bound)), lower_bound, label='05 th Quantile')
639+
ax.scatter(np.arange(len(upper_bound)),
640+
upper_bound, label='95th Quantile')
641+
ax.scatter(np.arange(len(lower_bound)),
642+
lower_bound, label='5th Quantile')
678643
679644
ax.set_xticks(np.arange(0, len(upper_bound), 2))
680-
ax.set_xticklabels(num_list[::2])
645+
ax.set_xticklabels(n_obs_list[::2])
681646
ax.set_xlabel('Number of Observations', fontsize=12)
682-
ax.set_title('Bayesian Coverage Intervals of Posterior Distributions', fontsize=15)
647+
ax.set_title('Bayesian Coverage Intervals of '
648+
'Posterior Distributions', fontsize=15)
683649
684650
ax.legend(fontsize=11)
685651
plt.show()

0 commit comments

Comments
 (0)