In [1]:
import numpy as np

import plotly.figure_factory as ff

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from ipywidgets import VBox, HBox

from scipy.stats import norm

In [2]:
np.random.seed(seed=5)

# Let's start by generating some uniform data using our friend Numpy

In [5]:
unif_1 = np.random.uniform(size = (1000,1))
unif_dist  = ff.create_distplot([unif_1.reshape(-1)], group_labels = [' '], show_curve=False, show_rug=False, bin_size=0.05)
unif_dist.update_layout(showlegend=False, title_text='Uniform Data', width=800, height=500)
unif_dist.show()

# We want to use it to generate data in the same space as traditional/raw data, say heights of people in the office
<img src="Images/unif_to_inv_cdf.jpg"/>

## We can use the Inverse Cumulative Distribution Function.
## Takes in data in the 0 to 1 space
## Output lower bound value of - inf
## Output upper bound value of inf

In [6]:
normal_dist_data = norm.ppf(unif_1, loc=160, scale=7)

In [7]:
normal_dist  = ff.create_distplot([normal_dist_data.reshape(-1)], group_labels = [' '], show_rug=False, bin_size=0.1)
normal_dist.update_layout(showlegend=False, title_text='Uniform to Inverse CDF', width=1000, height=500)
normal_dist.show()

# What if I want to transform raw data into uniform data?
<img src="Images/data_to_uniform.jpg"/>

## We use the Cumulative Distribution Function, which as the following properties:
## Takes in data in the -inf to inf space, SciPy seems to stop at -37 and 8
## Output lower bound value of 0
## Output upper bound value of 1
## For the standard normal you will usually see this symbol $\Phi$, called Phi
## $\Phi (- x) = 1 -  \Phi(x)$

In [17]:
norm.cdf(7)

0.9999999999987201

In [8]:
unif_2 = norm.cdf(normal_dist_data, loc=160, scale=7)

In [9]:
unif_dist_2  = ff.create_distplot([unif_2.reshape(-1)], group_labels = [' '], show_curve=False, show_rug=False, bin_size=0.05)
unif_dist_2.update_layout(showlegend=False, title_text='Raw data to uniform', width=800, height=500)
unif_dist_2.show()

# How does PDF fit into  all of this?
<img src="Images/data_to_pdf.jpg"/>

## Probability Density function is the derivative (or the slope) of the CDF. 
## Which means we can obtain the CDF by integrating the PDF.  There is no closed form solution ( a nice formula), but this has been calculated for us in all software as it's key to statistics
## The output is always bigger than 0
## The area under the curve adds up to 1


In [10]:
u = np.linspace(.01, .99, 100)
z = np.linspace(-3, 3, 100)

In [11]:
f = go.FigureWidget()
f.add_scatter(x = u, y=norm.ppf(u), name = 'Inverse CDF');
f.layout.title = 'Inverse Cumulative Distribution Function'
f.update_layout(xaxis_title="Input",
    yaxis_title="Output")


g = go.FigureWidget()
g.add_scatter(x = z, y=norm.cdf(z), name = 'CDF');
g.layout.title = 'Cumulative Distribution Function'
g.update_layout(xaxis_title="Input",
    yaxis_title="Output")


h = go.FigureWidget()
h.add_scatter(x = z, y=norm.pdf(z), name = 'PDF')
h.layout.title = 'Probability Density Function'
h.update_layout(xaxis_title="Input",
    yaxis_title="Output")
VBox([HBox([f, g]), h])

VBox(children=(HBox(children=(FigureWidget({
    'data': [{'name': 'Inverse CDF',
              'type': 'scatt…

# Real useful use of the PDF is to find the natural log of the function and use it in the log-likelihood when we are trying to find the parameters of the function which generated this data using an optimiser such as in Maximum Likelihood Estimation or MCMC techniques. 