In [5]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
import warnings
import plotly.express as px
import plotly.graph_objects as go
from scipy.interpolate import interp1d
from scipy.integrate import simps
import numba as nb

In [4]:
def plot_pdf_and_samples_histogram(xs, pdfs, samps, x_range=(-4,4), bins=100):
  histo=np.histogram(samps, density=True, range=x_range, bins=bins)
  fig_dict = {
    "data": [],
    "layout": {}
  }
  xaxis_dict = dict(autorange=True)
  yaxis_dict = dict(autorange=True)
  fig_dict["layout"] = go.Layout(
                        xaxis=xaxis_dict,
                        yaxis=yaxis_dict,
                        xaxis_title='x',
                        yaxis_title='density' )
  fig_dict["data"]=[
    go.Scatter(x=xs, y=pdfs, name='pdf'),
    go.Bar(x=histo[1],y=histo[0],name='samples')
  ]
  fig=go.Figure(fig_dict)
  fig.update_layout(bargap=0)
  return fig

In [6]:
def inverse_cdf(pdf,range=(-25,25), bins=10000001):
  """Generates random samples distributed with pdf using the inverse
  transform sampling method"""
  def normalisation(x):
	  return simps(pdf(x), x)
  xs = np.linspace(*range, bins)
  # define function to normalise our pdf to sum to 1 so it satisfies a distribution
  norm_constant = normalisation(xs)
  # create pdf
  my_pdfs = pdf(xs) / norm_constant
  # create cdf then ensure it is bounded at [0,1]
  my_cdf = np.cumsum(my_pdfs)
  my_cdf = my_cdf / my_cdf[-1]
  # generate the inverse cdf
  func_ppf = interp1d(my_cdf, xs, fill_value='extrapolate')
  return func_ppf


In [7]:
def f(x):
  return np.exp(-np.abs(x))

inv_cdf_f=inverse_cdf(f)

newxs=np.linspace(0,1,10000)
invcdfs=inv_cdf_f(newxs)

In [8]:
def samplerf_single():
  rand = np.random.random_sample()
  return np.interp(rand, newxs, invcdfs)

def samplerf_multi(size):
  rand = np.random.random_sample(size)
  return np.interp(rand, newxs, invcdfs)

In [9]:
jit_samplerf_single=nb.njit(samplerf_single)
jit_samplerf_multi=nb.njit(samplerf_multi)

In [10]:
%timeit [samplerf_single() for i in range(1000)]

4.42 ms ± 36.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [14]:
%timeit [jit_samplerf_single() for i in range(1000)]

592 µs ± 12.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [12]:
%timeit samplerf_multi(1000)

101 µs ± 1.39 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [15]:
%timeit jit_samplerf_multi(1000)

113 µs ± 974 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [16]:
# define n
n = 100000
# generate our samps
our_samps = jit_samplerf_multi(n)
xs = np.linspace(-4, 4, 100001)
normalization=2.0
teo_pdf = f(xs)
teo_pdf=teo_pdf/normalization

In [17]:
plot_pdf_and_samples_histogram(xs, teo_pdf, our_samps)