@@ -138,68 +138,44 @@ As usual, a law of large numbers justifies this answer.
138138Here is one solution:
139139
140140``` {code-cell} ipython3
141- class frequentist :
141+ class Frequentist :
142142
143143 def __init__(self, θ, n, I):
144-
145- '''
146- initialization
147- -----------------
148- parameters:
149- θ : probability that one toss of a coin will be a head with Y = 1
150- n : number of independent flips in each independent sequence of draws
151- I : number of independent sequence of draws
152-
153- '''
154-
155144 self.θ, self.n, self.I = θ, n, I
156145
157146 def binomial(self, k):
158-
159- '''compute the theoretical probability for specific input k'''
160-
161- θ, n = self.θ, self.n
147+ '''Compute the theoretical probability.'''
162148 self.k = k
163- self.P = binom.pmf(k, n, θ)
149+ self.P = binom.pmf(k, self. n, self. θ)
164150
165151 def draw(self):
166-
167- '''draw n independent flips for I independent sequences'''
168-
152+ '''Draw n independent flips for I sequences.'''
169153 θ, n, I = self.θ, self.n, self.I
170154 sample = np.random.rand(I, n)
171- Y = (sample <= θ) * 1
172- self.Y = Y
173-
174- def compute_fk(self, kk):
155+ self.Y = (sample <= θ).astype(int)
175156
176- '''compute f_{k}^I for specific input k'''
177-
178- Y, I = self.Y, self.I
179- K = np.sum(Y, 1)
180- f_kI = np.sum(K == kk) / I
181- self.f_kI = f_kI
182- self.kk = kk
157+ def compute_fk(self, k):
158+ '''Compute f_k^I for a given k.'''
159+ head_counts = np.sum(self.Y, axis=1)
160+ self.f_kI = np.sum(head_counts == k) / self.I
183161
184162 def compare(self):
185-
186- '''compute and print the comparison'''
187-
163+ '''Compute and print the comparison.'''
188164 n = self.n
189- comp = pt.PrettyTable()
190- comp .field_names = ['k', 'Theoretical', 'Frequentist']
165+ table = pt.PrettyTable()
166+ table .field_names = ['k', 'Theoretical', 'Frequentist']
191167 self.draw()
192168 for i in range(n+1):
193169 self.binomial(i)
194170 self.compute_fk(i)
195- comp .add_row([i, self.P, self.f_kI])
196- print(comp )
171+ table .add_row([i, self.P, self.f_kI])
172+ print(table )
197173```
198174
199175``` {code-cell} ipython3
200176θ, n, k, I = 0.7, 20, 10, 1_000_000
201177
202- freq = frequentist (θ, n, I)
178+ freq = Frequentist (θ, n, I)
203179
204180freq.compare()
205181```
222198We'll vary $\theta$ from $0.01$ to $0.99$ and plot outcomes against $\theta$.
223199
224200``` {code-cell} ipython3
225- θ_low, θ_high, npt = 0.01, 0.99, 50
226- thetas = np.linspace(θ_low, θ_high, npt )
201+ θ_low, θ_high, n_thetas = 0.01, 0.99, 50
202+ thetas = np.linspace(θ_low, θ_high, n_thetas )
227203P = []
228204f_kI = []
229- for i in range(npt ):
230- freq = frequentist (thetas[i], n, I)
205+ for i in range(n_thetas ):
206+ freq = Frequentist (thetas[i], n, I)
231207 freq.binomial(k)
232208 freq.draw()
233209 freq.compute_fk(k)
@@ -255,12 +231,12 @@ Now we fix $\theta=0.7, k=10, I=1,000,000$ and vary $n$ from $1$ to $100$.
255231Then we'll plot outcomes.
256232
257233``` {code-cell} ipython3
258- n_low, n_high, nn = 1, 100, 50
259- ns = np.linspace(n_low, n_high, nn , dtype='int')
234+ n_low, n_high, n_ns = 1, 100, 50
235+ ns = np.linspace(n_low, n_high, n_ns , dtype='int')
260236P = []
261237f_kI = []
262- for i in range(nn ):
263- freq = frequentist (θ, ns[i], I)
238+ for i in range(n_ns ):
239+ freq = Frequentist (θ, ns[i], I)
264240 freq.binomial(k)
265241 freq.draw()
266242 freq.compute_fk(k)
@@ -286,13 +262,13 @@ plt.show()
286262Now we fix $\theta=0.7, n=20, k=10$ and vary $\log(I)$ from $2$ to $6$.
287263
288264``` {code-cell} ipython3
289- I_log_low, I_log_high, nI = 2, 6, 200
290- log_Is = np.linspace(I_log_low, I_log_high, nI )
265+ I_log_low, I_log_high, n_Is = 2, 6, 200
266+ log_Is = np.linspace(I_log_low, I_log_high, n_Is )
291267Is = np.power(10, log_Is).astype(int)
292268P = []
293269f_kI = []
294- for i in range(nI ):
295- freq = frequentist (θ, n, Is[i])
270+ for i in range(n_Is ):
271+ freq = Frequentist (θ, n, Is[i])
296272 freq.binomial(k)
297273 freq.draw()
298274 freq.compute_fk(k)
@@ -430,85 +406,64 @@ Now please pretend that the true value of $\theta = .4$ and that someone who doe
430406class Bayesian:
431407
432408 def __init__(self, θ=0.4, n=1_000_000, α=0.5, β=0.5):
433- """
434- Parameters:
409+ '''
410+ Parameters
435411 ----------
436- θ : float, ranging from [0,1].
437- probability that one toss of a coin will be a head with Y = 1
438-
439- n : int.
440- number of independent flips in an independent sequence of draws
441-
442- α&β : int or float.
443- parameters of the prior distribution on θ
444-
445- """
412+ θ : Probability of heads on each flip.
413+ n : Number of flips in the sequence.
414+ α, β : Parameters of the beta prior on θ.
415+ '''
446416 self.θ, self.n, self.α, self.β = θ, n, α, β
447417 self.prior = st.beta(α, β)
448418
449419 def draw(self):
450- """
451- simulate a single sequence of draws of length n, given probability θ
452-
453- """
420+ '''Simulate a sequence of n coin flips.'''
454421 array = np.random.rand(self.n)
455422 self.draws = (array < self.θ).astype(int)
456423
457- def form_single_posterior(self, step_num):
458- """
459- form a posterior distribution after observing the first step_num elements of the draws
460-
461- Parameters
462- ----------
463- step_num: int.
464- number of steps observed to form a posterior distribution
465-
466- Returns
467- ------
468- the posterior distribution for sake of plotting in the subsequent steps
424+ def form_single_posterior(self, n_obs):
425+ '''Return the posterior after the first n_obs flips.'''
426+ heads = self.draws[:n_obs].sum()
427+ tails = n_obs - heads
428+ return st.beta(self.α + heads, self.β + tails)
469429
470- """
471- heads_num = self.draws[:step_num].sum()
472- tails_num = step_num - heads_num
473-
474- return st.beta(self.α+heads_num, self.β+tails_num)
475-
476- def form_posterior_series(self,num_obs_list):
477- """
478- form a series of posterior distributions that form after observing different number of draws.
479-
480- Parameters
481- ----------
482- num_obs_list: a list of int.
483- a list of the number of observations used to form a series of posterior distributions.
484-
485- """
430+ def form_posterior_series(self, n_obs_list):
431+ '''Form posteriors for each sample size in n_obs_list.'''
486432 self.posterior_list = []
487- for num in num_obs_list:
488- self.posterior_list.append(self.form_single_posterior(num))
433+ for n_obs in n_obs_list:
434+ self.posterior_list.append(
435+ self.form_single_posterior(n_obs)
436+ )
489437```
490438
491439** d)** Please plot the posterior distribution for $\theta$ as a function of $\theta$ as $n$ grows from $1, 2, \ldots$.
492440
493441``` {code-cell} ipython3
494- Bay_stat = Bayesian()
495- Bay_stat .draw()
442+ bayes = Bayesian()
443+ bayes .draw()
496444
497- num_list = [1, 2, 3, 4, 5, 10, 20, 30, 50, 70, 100, 300, 500, 1000, # this line for finite n
498- 5000, 10_000, 50_000, 100_000, 200_000, 300_000] # this line for approximately infinite n
445+ n_obs_list = [1, 2, 3, 4, 5, 10, 20, 30, 50, 70,
446+ 100, 300, 500, 1000,
447+ 5000, 10_000, 50_000, 100_000,
448+ 200_000, 300_000]
499449
500- Bay_stat .form_posterior_series(num_list )
450+ bayes .form_posterior_series(n_obs_list )
501451
502- θ_values = np.linspace(0.01, 1, 100 )
452+ θ_values = np.linspace(0.01, 1, 1000 )
503453
504454fig, ax = plt.subplots(figsize=(10, 6))
505455
506- ax.plot(θ_values, Bay_stat.prior.pdf(θ_values), label='Prior Distribution', color='k', linestyle='--')
456+ ax.plot(θ_values, bayes.prior.pdf(θ_values),
457+ label='Prior Distribution', color='k',
458+ linestyle='--')
507459
508- for ii, num in enumerate(num_list[:14]):
509- ax.plot(θ_values, Bay_stat.posterior_list[ii].pdf(θ_values), label='Posterior with n = %d' % num)
460+ for i, n_obs in enumerate(n_obs_list[:14]):
461+ posterior = bayes.posterior_list[i]
462+ ax.plot(θ_values, posterior.pdf(θ_values),
463+ label=f'Posterior with n = {n_obs}')
510464
511- ax.set_title('P.D.F of Posterior Distributions', fontsize=15)
465+ ax.set_title('P.D.F of Posterior Distributions',
466+ fontsize=15)
512467ax.set_xlabel(r"$\theta$", fontsize=15)
513468
514469ax.legend(fontsize=11)
@@ -518,13 +473,13 @@ plt.show()
518473** e)** For various $n$'s, please describe and compute $.05$ and $.95$ quantiles for posterior probabilities.
519474
520475``` {code-cell} ipython3
521- lower_bound = [ii .ppf(0.05) for ii in Bay_stat .posterior_list[:14]]
522- upper_bound = [ii .ppf(0.95) for ii in Bay_stat .posterior_list[:14]]
476+ lower_bound = [post .ppf(0.05) for post in bayes .posterior_list[:14]]
477+ upper_bound = [post .ppf(0.95) for post in bayes .posterior_list[:14]]
523478
524479interval_df = pd.DataFrame()
525480interval_df['upper'] = upper_bound
526481interval_df['lower'] = lower_bound
527- interval_df.index = num_list [:14]
482+ interval_df.index = n_obs_list [:14]
528483interval_df = interval_df.T
529484interval_df
530485```
548503``` {code-cell} ipython3
549504left_value, right_value = 0.45, 0.55
550505
551- posterior_prob_list=[ii.cdf(right_value)-ii.cdf(left_value) for ii in Bay_stat.posterior_list]
506+ posterior_prob_list = [
507+ post.cdf(right_value) - post.cdf(left_value)
508+ for post in bayes.posterior_list
509+ ]
552510
553511fig, ax = plt.subplots(figsize=(8, 5))
554512ax.plot(posterior_prob_list)
555- ax.set_title('Posterior Probabililty that '+ r"$\theta$" +' Ranges from %.2f to %.2f'%(left_value, right_value),
556- fontsize=13)
513+ ax.set_title(
514+ r'Posterior Probability that $\theta$'
515+ f' Ranges from {left_value:.2f}'
516+ f' to {right_value:.2f}',
517+ fontsize=13)
557518ax.set_xticks(np.arange(0, len(posterior_prob_list), 3))
558- ax.set_xticklabels(num_list [::3])
519+ ax.set_xticklabels(n_obs_list [::3])
559520ax.set_xlabel('Number of Observations', fontsize=11)
560521
561522plt.show()
@@ -584,10 +545,10 @@ Using the Python class we made above, we can see the evolution of posterior dist
584545``` {code-cell} ipython3
585546fig, ax = plt.subplots(figsize=(10, 6))
586547
587- for ii, num in enumerate(num_list [14:]):
588- ii += 14
589- ax.plot(θ_values, Bay_stat.posterior_list[ii] .pdf(θ_values),
590- label='Posterior with n=%d thousand' % (num/1000) )
548+ for i, n_obs in enumerate(n_obs_list [14:]):
549+ posterior = bayes.posterior_list[i + 14]
550+ ax.plot(θ_values, posterior .pdf(θ_values),
551+ label=f 'Posterior with n = {n_obs:,}' )
591552
592553ax.set_title('P.D.F of Posterior Distributions', fontsize=15)
593554ax.set_xlabel(r"$\theta$", fontsize=15)
@@ -604,21 +565,23 @@ Here the posterior mean converges to $0.4$ while the posterior standard deviat
604565To show this, we compute the means and variances statistics of the posterior distributions.
605566
606567``` {code-cell} ipython3
607- mean_list = [ii .mean() for ii in Bay_stat .posterior_list]
608- std_list = [ii .std() for ii in Bay_stat .posterior_list]
568+ mean_list = [post .mean() for post in bayes .posterior_list]
569+ std_list = [post .std() for post in bayes .posterior_list]
609570
610571fig, ax = plt.subplots(1, 2, figsize=(14, 5))
611572
612573ax[0].plot(mean_list)
613- ax[0].set_title('Mean Values of Posterior Distribution', fontsize=13)
574+ ax[0].set_title('Mean of Posterior Distribution',
575+ fontsize=13)
614576ax[0].set_xticks(np.arange(0, len(mean_list), 3))
615- ax[0].set_xticklabels(num_list [::3])
577+ ax[0].set_xticklabels(n_obs_list [::3])
616578ax[0].set_xlabel('Number of Observations', fontsize=11)
617579
618580ax[1].plot(std_list)
619- ax[1].set_title('Standard Deviations of Posterior Distribution', fontsize=13)
581+ ax[1].set_title('Std Dev of Posterior Distribution',
582+ fontsize=13)
620583ax[1].set_xticks(np.arange(0, len(std_list), 3))
621- ax[1].set_xticklabels(num_list [::3])
584+ ax[1].set_xticklabels(n_obs_list [::3])
622585ax[1].set_xlabel('Number of Observations', fontsize=11)
623586
624587plt.show()
@@ -669,17 +632,20 @@ According to the Law of Large Numbers, for a large number of observations, obser
669632Consequently, the mean of the posterior distribution converges to $0.4$ and the variance withers to zero.
670633
671634``` {code-cell} ipython3
672- upper_bound = [ii .ppf(0.95) for ii in Bay_stat .posterior_list]
673- lower_bound = [ii .ppf(0.05) for ii in Bay_stat .posterior_list]
635+ upper_bound = [post .ppf(0.95) for post in bayes .posterior_list]
636+ lower_bound = [post .ppf(0.05) for post in bayes .posterior_list]
674637
675638fig, ax = plt.subplots(figsize=(10, 6))
676- ax.scatter(np.arange(len(upper_bound)), upper_bound, label='95 th Quantile')
677- ax.scatter(np.arange(len(lower_bound)), lower_bound, label='05 th Quantile')
639+ ax.scatter(np.arange(len(upper_bound)),
640+ upper_bound, label='95th Quantile')
641+ ax.scatter(np.arange(len(lower_bound)),
642+ lower_bound, label='5th Quantile')
678643
679644ax.set_xticks(np.arange(0, len(upper_bound), 2))
680- ax.set_xticklabels(num_list [::2])
645+ ax.set_xticklabels(n_obs_list [::2])
681646ax.set_xlabel('Number of Observations', fontsize=12)
682- ax.set_title('Bayesian Coverage Intervals of Posterior Distributions', fontsize=15)
647+ ax.set_title('Bayesian Coverage Intervals of '
648+ 'Posterior Distributions', fontsize=15)
683649
684650ax.legend(fontsize=11)
685651plt.show()
0 commit comments