In [None]:
# %% Calculus 2 - Section 12.86
#    Code challenge: pdfs and cdfs

# This code pertains to a calculus course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/pycalc2_x
# The code in this repository is developed to solve the exercises provided along
# the course, and it has been written partially indepentently and partially
# from the code developed by the course instructor.


In [2]:
import numpy                as np
import sympy                as sym
import matplotlib.pyplot    as plt
import matplotlib.colors    as mcolors
import scipy.integrate      as spi
import math
import mpmath
import plotly.graph_objects as go
import sympy.stats

from scipy.signal                     import find_peaks
from scipy                            import stats
from IPython.display                  import display,Math
from google.colab                     import files
from IPython.display                  import Audio
from scipy.io                         import wavfile
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')

import matplotlib.animation as animation
from matplotlib import rc
rc('animation', html='jshtml')


In [None]:
# %% Exercise 1
#    Compute a logistic cdf in three different ways. Start by computing a
#    logistic pdf with sp.stats; then derive three logistic cdfs with
#    1) sp.stats, 2) numpy cumulative sum, 3) cumulative simpson in spi. For the
#    pdf use 31 points with a domain [-5,5]

# Resolution
x  = np.linspace(-5,5,31)
dx = x[1] - x[0]

# Pdf function
pdf = stats.logistic.pdf(x)

# Case 1 (scipy)
cdf_sp = stats.logistic.cdf(x)

# Case 2 (approximate integral of cdf with numpy; scale by dx)
cdf_np = np.cumsum(pdf) * dx

# Case 3 (approximate integral with simpson's)
cdf_simp = spi.cumulative_simpson(pdf,x=x,initial=0)

# Plot
phi = (1 + np.sqrt(5)) / 2
_,axs = plt.subplots(1,2,figsize=(1.5*phi*5,5))

axs[0].plot(x,pdf)
axs[0].set(xlabel='x',ylabel='PDF',xlim=x[[0,-1]],title=f'Logistic PDF (N = {len(x)})')

axs[1].plot(x,cdf_sp,label='CDF via sp.stats')
axs[1].plot(x,cdf_np,label='CDF via numpy')
axs[1].plot(x,cdf_simp,'--',label='CDF via simpson')
axs[1].set(xlabel='x',ylabel='CDF',xlim=x[[0,-1]],title=f'Logistic CDF (N = {len(x)})')
axs[1].legend()

plt.tight_layout()


plt.savefig('fig10_codechallenge_86_exercise_1.png')
plt.show()
files.download('fig10_codechallenge_86_exercise_1.png')


In [None]:
# %% Exercise 2
#    Reproduce the image shown in the video; use the parameters shown in the
#    video; use a Gaussian distribution; compute the area up to indicated value
#    both as definite integral of pdf and as index of the cdf

# Resolution
x   = np.linspace(-2,2,501)
dx = x[1] - x[0]

# Pdf parameters
mu    = np.sqrt(2)/2
sigma = 1/np.pi

# Definite integral upper bound (closest point to mu)
int_val = sigma/mu
int_idx = np.argmin(abs(x-int_val))

# Get the pdf and cdf
pdf = stats.norm.pdf(x,loc=mu,scale=sigma) * dx
cdf = stats.norm.cdf(x,loc=mu,scale=sigma)

# Print areas from pdf and cdf
area_from_pdf = spi.simpson(pdf[:int_idx],dx=dx/dx)
area_from_cdf = cdf[int_idx]

print(f'Area from pdf: {area_from_pdf:.4f}')
print(f'Area from cdf: {area_from_cdf:.4f}')

# Plot
phi = (1 + np.sqrt(5)) / 2
_,axs = plt.subplots(2,1,figsize=(phi*6,6))
axs[0].plot(x,pdf,color='m',linewidth=2)
axs[0].plot([int_val,int_val],[0,pdf[int_idx]],'b--')
axs[0].fill_between(x[:int_idx],pdf[:int_idx],color='b',alpha=.2)

axs[0].annotate(r'$\frac{2}{\sqrt{2}\pi}$',xy=(int_val,pdf[int_idx]/2),xytext=((int_val+x[0])/2,pdf[int_idx]/2),arrowprops={'facecolor':'k'},verticalalignment='center',fontsize=20)
axs[0].set(xlim=x[[0,-1]],xlabel='x',ylim=[-.00004,np.max(pdf)*1.05],ylabel='Probability density',title=r'Normal pdf $\left(\mu = \sqrt{2}/2, \sigma = \pi^{-1}, N = %g \right)$' %len(x))

axs[1].plot(x,cdf,color='m',linewidth=2)
axs[1].plot([int_val,int_val],[0,cdf[int_idx]],'b--')
axs[1].plot(x[[0,int_idx]],[cdf[int_idx],cdf[int_idx]],'b--')
axs[1].set(xlim=x[[0,-1]],xlabel='x',ylim=[-.015,1.02],ylabel='Cumulative probability',title='Normal cdf')

plt.tight_layout()

plt.savefig('fig11_codechallenge_86_exercise_2.png')
plt.show()
files.download('fig11_codechallenge_86_exercise_2.png')


In [None]:
# %% Exercise 3
#    Compute the exact definite integral with sympy

# Parameters
u        = sym.symbols('u')
mu_s     = sym.sqrt(2)/2
sigma_s  = 1/sym.pi
intVal_s = sigma_s/mu_s

# Function and integral
N        = sym.stats.Normal('N',mu_s,sigma_s)
cdf_expr = sym.stats.cdf(N)(u)

exact_defint = cdf_expr.subs(u,intVal_s)

# Print
display(Math('c(u) \\;=\\; %s' %sym.latex(cdf_expr))), print('')
display(Math('c\\left(%s\\right) \\;=\\; %s \\;\\approx\\; %g' %(sym.latex(intVal_s),sym.latex(exact_defint),exact_defint.evalf())))


In [None]:
# %% Exercise 4
#    Explore the accuracy of pdf and cdf approximations for increasing number of
#    points (log scale between 51 and 10.001)

# Initialize
pp = np.logspace(np.log10(51),np.log10(10001),25).astype(int)

areaPdf = np.zeros(len(pp))
areaCdf = np.zeros(len(pp))

# Compute
for idx,n in enumerate(pp):

    x = np.linspace(-2,2,n)
    dx = x[1] - x[0]
    pdf = stats.norm.pdf(x,loc=mu,scale=sigma) * dx
    cdf = stats.norm.cdf(x,loc=mu,scale=sigma)
    intidx = np.argmin(abs(x-intVal))

    areaPdf[idx] = spi.simpson(pdf[:intidx],dx=dx/dx)
    areaCdf[idx] = cdf[intidx]

# Plot
phi = (1 + np.sqrt(5)) / 2
plt.figure(figsize=(phi*5,5))
plt.axhline(exact_defint.evalf(),color='grey',linestyle=':',linewidth=0.8,label='Exact (sympy)')
plt.plot(pp,areaPdf,'s-',label='From pdfs')
plt.plot(pp,areaCdf,'s-',label='From cdfs')

plt.gca().set(xlabel='Number of points',ylabel='Area',xscale='log')
plt.title('Approximations for increasing points')
plt.legend()

plt.savefig('fig12_codechallenge_86_exercise_4.png')
plt.show()
files.download('fig12_codechallenge_86_exercise_4.png')
