In [None]:
# %% Calculus 2 - Section 12.88
#    Code challenge: pdfs and cdfs

# This code pertains to a calculus course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/pycalc2_x
# The code in this repository is developed to solve the exercises provided along
# the course, and it has been written partially indepentently and partially
# from the code developed by the course instructor.


In [1]:
import numpy                as np
import sympy                as sym
import matplotlib.pyplot    as plt
import matplotlib.colors    as mcolors
import scipy.integrate      as spi
import math
import mpmath
import plotly.graph_objects as go
import sympy.stats

from scipy.signal                     import find_peaks
from scipy                            import stats
from IPython.display                  import display,Math
from google.colab                     import files
from IPython.display                  import Audio
from scipy.io                         import wavfile
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')

import matplotlib.animation as animation
from matplotlib import rc
rc('animation', html='jshtml')


In [None]:
# %% Exercise 1
#    Create a normal pdf and cdf; use an x-axis resolution of 501 points and go
#    with stats.norm.[pdf,cdf]

# Pdf and cdf (normalise pdf by dx)
x  = np.linspace(-4,4,5001)
dx = x[1]-x[0]

zval = 1

pdf = stats.norm.pdf(x) * dx
cdf = stats.norm.cdf(x)

# Plot
phi = (1 + np.sqrt(5)) / 2
_,axs = plt.subplots(2,1,figsize=(5*phi,5))

axs[0].plot(x,pdf,color='tab:blue')
axs[0].plot([zval,zval],[0,pdf[np.argmin(abs(x-zval))]],'k:',linewidth=0.8)
axs[0].fill_between(x[x>=zval],pdf[x>=zval],color='tab:blue',alpha=.15)
axs[0].set(xlim=x[[0,-1]],ylabel='Probability density',title='Gaussian PDF')
axs[0].set_ylim(bottom=0)

axs[1].plot(x,cdf,color='tab:green')
axs[1].plot([zval,zval],[0,stats.norm.cdf(zval)],'k:',linewidth=0.8)
axs[1].plot([x[0],zval],np.full(2,stats.norm.cdf(zval)),'k:',linewidth=0.8)
axs[1].set(ylim=[-.02,1.02],xlim=x[[0,-1]],xlabel='z',ylabel='Cumulative probability',title='Gaussian CDF')

plt.tight_layout()

plt.savefig('fig13_codechallenge_88_exercise_1.png')
plt.show()
files.download('fig13_codechallenge_88_exercise_1.png')


In [None]:
# %% Exercise 2
#    Compute the probability range from the previous exercise with 1) numpy cum
#    sum on pdf, 2) stats.simpson on pdf, 3) index in pdf, 4) value from
#    stats.norm.cdf, 5) sym.stats (i.e., implement FTC)

# (1) numpy
p_1 = np.sum(pdf[np.argmin(abs(x-zval)):])
print(f'Method 1: {p_1:.10f}')

# (2) simpson
p_2 = spi.simpson(pdf[np.argmin(abs(x-zval)):],dx=1)
print(f'Method 2: {p_2:.10f}')

# (3) cdf index
p_3 = 1 - cdf[np.argmin(abs(x-zval))]
print(f'Method 3: {p_3:.10f}')

# Method 4: from stats.norm.cdf
p_4 = 1 - stats.norm.cdf(zval)
print(f'Method 4: {p_4:.10f}')

# Method 5: From sympy
xx     = sym.symbols('x')
norm   = sym.stats.Normal('N',0,1)
symcdf = sym.stats.cdf(norm)(xx)

p_5    = symcdf.subs(xx,sym.oo) - symcdf.subs(xx,zval).evalf()
print(f'Method 5: {p_5:.10f}')


In [None]:
# %% Exercise 3
#    Same as exercise 1 and 2 but with a finite interval [-2/3, 1/3]

# zvals
zval_low  = -2/3
zval_high = 1/3

# Plot
phi = (1 + np.sqrt(5)) / 2
_,axs = plt.subplots(2,1,figsize=(5*phi,5))

axs[0].plot(x,pdf,color='tab:blue')
axs[0].plot([zval_low,zval_low],[0,pdf[np.argmin(abs(x-zval_low))]],'k:',linewidth=0.8)
axs[0].plot([zval_high,zval_high],[0,pdf[np.argmin(abs(x-zval_high))]],'k:',linewidth=0.8)

z4fill = (x>=zval_low) & (x<=zval_high)
axs[0].fill_between(x[z4fill],pdf[z4fill],color='tab:blue',alpha=.15)
axs[0].set(xlim=x[[0,-1]],ylabel='Probability density',title='Gaussian PDF')
axs[0].set_ylim(bottom=0)

axs[1].plot(x,cdf,color='tab:green')
axs[1].plot([zval_low,zval_low],[0,stats.norm.cdf(zval_low)],'k:',linewidth=0.8)
axs[1].plot([x[0],zval_low],np.full(2,stats.norm.cdf(zval_low)),'k:',linewidth=0.8)
axs[1].plot([zval_high,zval_high],[0,stats.norm.cdf(zval_high)],'k:',linewidth=0.8)
axs[1].plot([x[0],zval_high],np.full(2,stats.norm.cdf(zval_high)),'k:',linewidth=0.8)
axs[1].set(ylim=[-.02,1.02],xlim=x[[0,-1]],xlabel='z',ylabel='Cumulative probability',title='Gaussian CDF')

plt.tight_layout()

plt.savefig('fig14_codechallenge_88_exercise_3.png')
plt.show()
files.download('fig14_codechallenge_88_exercise_3.png')


In [None]:
# %% Exercise 3
#    Continue ...

# Probability range with the five methods seen above

# (1) numpy
p_1 = np.sum(pdf[z4fill])
print(f'Method 1: {p_1:.10f}')

# (2) simpson
p_2 = spi.simpson(pdf[z4fill],dx=1)
print(f'Method 2: {p_2:.10f}')

# (3) cdf index
p_3 = cdf[np.argmin(abs(x-zval_high))] - cdf[np.argmin(abs(x-zval_low))]
print(f'Method 3: {p_3:.10f}')

# Method 4: from stats.norm.cdf
p_4 = stats.norm.cdf(zval_high) - stats.norm.cdf(zval_low)
print(f'Method 4: {p_4:.10f}')

# Method 5: From sympy
xx     = sym.symbols('x')
norm   = sym.stats.Normal('N',0,1)
symcdf = sym.stats.cdf(norm)(xx)

p_5    = symcdf.subs(xx,zval_high) - symcdf.subs(xx,zval_low).evalf()
print(f'Method 5: {p_5:.10f}')


In [None]:
# %% Exercise 4
#    Try again exercises 1-3 but with a higher x-axis resolution (5001 points),
#    and see what happens to the accuracy of the various methods
