In [1]:
# imports
import numpy as np
import scipy as sp
import pandas as pd

from plotly.subplots import make_subplots
from plotly import graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff

import matplotlib.pyplot as plt

import plotly.io as pio

pio.templates.default = "plotly_white"

In [2]:
# uniform distribution

def generateUniformDistribution(a=0, b=1, size=1, random_state=0):
  np.random.seed(random_state)

  # generating an random array between 0, 1
  uniformDistribution = np.random.rand(size)

  # map [0, 1] with [a, b]
  uniformDistribution = uniformDistribution * a + (1 - uniformDistribution) * b

  return uniformDistribution


def generateIdealUniformDistribution(a=0, b=1, size=1):

  return [i * (b-a) / size for i in range(size)]

In [3]:
nbDataSamples = 10000

fig = make_subplots(rows=1, cols=2)

fig.add_trace(go.Histogram(x=generateUniformDistribution(size=nbDataSamples), histnorm='probability density', name="generated"), 
              row=1, col=1)

fig.add_trace(go.Histogram(x=generateIdealUniformDistribution(size=nbDataSamples), histnorm='probability density', name="ideal"), 
              row=1, col=2)

fig.update_layout(
    title="[0, 1]-Uniform distribution (nb samples={0})".format(nbDataSamples),
    font=dict(size=10),
    height=400,
    width=600
)

fig.show()

In [4]:
# Bernoulli distribution

def generateBernoulliDistribution(p=0.5, size=1, random_state=0):
  uniformDistribution = generateUniformDistribution(size=size, random_state=random_state)

  bernoulliDistribution = []

  for value in uniformDistribution:
    if value < p:
      bernoulliDistribution.append(1)
    else:
      bernoulliDistribution.append(0)
    
  return bernoulliDistribution


def generateIdealBernoulliDistribution(p=0.5, size=1):
  if size == 1:
    return [0] if p < 0.5 else [1]

  nbOnes = int(p * size)
  nbZeros = int((1 - p) * size)

  return [0 for i in range(nbZeros)] + [1 for i in range(nbOnes)]

In [5]:
generateIdealBernoulliDistribution(p=0.7, size=10)

[0, 0, 0, 1, 1, 1, 1, 1, 1, 1]

In [6]:
p = 0.7
nbDataSamples = 10000


fig = make_subplots(rows=1, cols=2)

fig.add_trace(go.Histogram(x=generateBernoulliDistribution(p=p, size=nbDataSamples), histnorm='probability density', name="generated"), 
              row=1, col=1)

fig.add_trace(go.Histogram(x=generateIdealBernoulliDistribution(p=p, size=nbDataSamples), histnorm='probability density', name="ideal"), 
              row=1, col=2)

fig.update_layout(
    title="{0}-Bernoulli distribution (nb samples={1})".format(p, nbDataSamples),
    font=dict(size=10),
    height=400,
    width=600
)

fig.show()

In [7]:
# Binomial distribution
def generateBinomialDistribution(n, p=0.5, size=1, random_state=0):
  binomialDistribution = []

  for i in range(size):
    # construct binomial as sum of bernoulli
    binomial = sum(generateBernoulliDistribution(p=p, size=n, random_state=random_state + i))
    binomialDistribution.append(binomial)

  return binomialDistribution


def generateIdealBinomialDistribution(n, p=0.5):
  return [sp.special.binom(n, k) * p ** k * (1 - p) ** (n - k) for k in range(n+1)]


print(generateBinomialDistribution(n=10, p=0.7, size=10, random_state=10))
print(generateIdealBinomialDistribution(10, p=0.7))

[5, 7, 5, 9, 8, 5, 5, 6, 7, 6]
[5.9049000000000085e-06, 0.00013778100000000018, 0.0014467005000000015, 0.009001692000000007, 0.03675690900000003, 0.10291934520000003, 0.20012094900000005, 0.266827932, 0.23347444049999994, 0.12106082099999996, 0.028247524899999984]


In [8]:
p = 0.1

fig = go.Figure(data=[
                      go.Bar(y=generateIdealBinomialDistribution(n=15, p=p))
])
                      
fig.show()

In [9]:
n = 20
p = 0.6
nbDataSamples = 10000


fig = make_subplots(rows=1, cols=2)

fig.add_trace(go.Histogram(x=generateBinomialDistribution(n=n, p=p, size=nbDataSamples), histnorm='probability density', name="generated"), 
              row=1, col=1)

fig.add_trace(go.Bar(y=generateIdealBinomialDistribution(n=n, p=p), name="ideal"), 
              row=1, col=2)

fig.update_layout(
    title="Binomial distribution (p={0}, n={1}, nb samples={2})".format(p, n, nbDataSamples),
    font=dict(size=10),
    height=400,
    width=600
)

fig.show()

In [10]:
# Poisson distribution
def generatePoissonDistribution(poissonParameter=1, size=1, random_state=0):
  # experience and proof shows that it is sufficient to set 
  # n = 50 to get a poisson distribution
  # how to set n depends on the poisson parameter also

  n = 50
  return generateBinomialDistribution(n, poissonParameter / n, size=size, random_state=random_state)


def generateIdealPoissonDistribution(poissonParameter):
  distribution = []
  k = 0

  while True:
    fraction = np.exp(-poissonParameter) * poissonParameter ** k / np.math.factorial(k)

    if fraction < 10 ** -5:
      break

    distribution.append(fraction)
    k += 1

  return distribution

In [11]:
parameter = 27
nbDataSamples = 10000


fig = make_subplots(rows=1, cols=2)

fig.add_trace(go.Histogram(x=generatePoissonDistribution(poissonParameter=parameter, size=nbDataSamples), histnorm='probability density', name="generated"), 
              row=1, col=1)

fig.add_trace(go.Bar(y=generateIdealPoissonDistribution(poissonParameter=parameter), name="ideal"), 
              row=1, col=2)

fig.update_layout(
    title="Poisson distribution (lambda={0}, nb samples={1})".format(parameter, nbDataSamples),
    font=dict(size=10),
    height=400,
    width=600
)

fig.show()

In [12]:
# Poisson distribution
# animation of Binomial(n, lambda/n) -> poisson(lambda)

nbSamples = 10 ** 2
poissonParma = 2
logInfinity = 3

frames = dict()

for n in [i for i in range(2, 40)]:
  frames[str(n)] = generateIdealBinomialDistribution(n, p=poissonParma / n)


In [13]:
fig = go.Figure(
    data=[go.Bar(y=[frames["30"]], name='Binomial distribution'), go.Scatter(y=generateIdealPoissonDistribution(poissonParma), name="Poisson distribution")],
    layout=go.Layout(
        xaxis=dict(range=[0, 30], autorange=False),
        yaxis=dict(range=[0, 0.6], autorange=False),
        title="Poisson as a limit of Binomial | paramters :({0}, {1} / {0})".format(40, poissonParma),
        updatemenus=[dict(
            type="buttons",
            buttons=[dict(label="Play",
                          method="animate",
                          args=[None])])]
    ),
    frames=[
            go.Frame(
                data=[ go.Bar(y=frames[col]), go.Scatter(y=generateIdealPoissonDistribution(poissonParma))],
                layout=go.Layout(
                  title="Poisson as a limit of Binomial | paramters :({0}, {1} / {0})".format(col, poissonParma)
                ))
             for col in frames.keys()]
)

fig.update_layout(
    font=dict(size=10),
    height=400,
    width=600
)

fig.show()

In [14]:
# Normal distribution
def generateNormalDistribution(mu=0, sigma=1, size=1, random_state=0, infinity=30):
  infinity = infinity
  normalDistribution = []

  meanUniform = 1 / 2
  sqrtVarUniform = np.sqrt(1 / (12 * infinity))

  for i in range(size):
    # generate an infinity of random variables
    arrUniforms = generateUniformDistribution(size=infinity, random_state=random_state + 10 * i)
    normalValue = (sum(arrUniforms) / infinity - 1 / 2) / sqrtVarUniform

    normalDistribution.append(normalValue)

  return normalDistribution


def generateIdealNormalDistribution(mu=0, sigma=1):
  u = np.linspace(-5, 5, 100)

  return np.exp(-(u - mu) ** 2 / (2 * sigma ** 2)) / (np.sqrt(2 * np.pi) * sigma)

In [15]:
normalDist = generateIdealNormalDistribution()

fig = go.Figure(data=[
                      go.Histogram(x=generateNormalDistribution(size=10000), histnorm='probability density', name="generated"),
                      go.Scatter(x=np.linspace(-5, 5, 100), y=normalDist, name="normal")
])

fig.update_layout(
    title="Normal distribution",
    font=dict(size=10),
    height=400,
    width=600
)

fig.show()

In [16]:
mu = 0
sigma = 1
nbDataSamples = 10000


fig = make_subplots(rows=1, cols=2)

fig.add_trace(go.Histogram(x=generateNormalDistribution(mu=mu, sigma=sigma, size=nbDataSamples), histnorm='probability density', name="generated"), 
              row=1, col=1)

fig.add_trace(go.Bar(x=np.linspace(-5, 5, 100), y=generateIdealNormalDistribution(mu=mu, sigma=sigma),name="ideal"), 
              row=1, col=2)

fig.update_layout(
    title="Normal distribution(mu={0}, sigma={1}, nb samples={2})".format(mu, sigma, nbDataSamples),
    font=dict(size=10),
    height=400,
    width=600
)

fig.show()

In [32]:
# simulation 
# uniform distribution --> (central limit) standard normal distribution
nbSamples = 1000
frames = dict()

for n in range(1, 5, 1):
  frames[str(n)] = [t for t in generateNormalDistribution(mu=mu, sigma=sigma, infinity=n, size=nbSamples) if -5 < t < 5]

In [18]:
frames["1"]

[-1.5391500117082855,
 -0.20888230587545684,
 -1.0803923460685656,
 0.15278609434942597,
 -1.1484130742011414,
 -0.18500414587165953,
 0.5708364298963085,
 0.0008938747953082017,
 -0.35094060557286444,
 -0.6407510382968334,
 1.1162833310064741,
 1.7337442263410585,
 -0.19260086089244477,
 -0.45277859033730783,
 0.8545242417814487,
 -0.3874278078483831,
 -1.2192214912191144,
 0.13695826423737617,
 1.0352753110052784,
 -1.3901844191295747,
 -1.5764501054281177,
 -0.14523380843006337,
 0.0024613760990258105,
 -0.8472108378691345,
 -1.5367796007439534,
 -0.08162068545027301,
 1.3424433729530036,
 0.9367236105153623,
 -0.4289221486752048,
 0.37103593318470895,
 0.7008186644903552,
 -1.437492760814888,
 -0.878003238294704,
 -0.11479866163189287,
 1.3797858796955889,
 -1.1574212841224796,
 0.7523319494144111,
 -0.31288274996889537,
 -0.4739517540735751,
 -0.47536191487281876,
 0.048827223493367225,
 -0.7083347603852586,
 1.7830206482338096,
 0.26126965438988925,
 0.5296892891305268,
 -0.97544

In [33]:
fig = go.Figure(
    data=[go.Histogram(x=[frames["2"]], histnorm='probability density', name='limit'), go.Scatter(x=np.linspace(-5, 5, 100), y=generateIdealNormalDistribution(), name="Normal distribution")],
    layout=go.Layout(
        #xaxis=dict(range=[-5, 5], autorange=False),
        yaxis=dict(range=[0, 1], autorange=False),
        title="Central limit theorem",
        updatemenus=[dict(
            type="buttons",
            buttons=[dict(label="Play",
                          method="animate",
                          args=[None])])]
    ),
    frames=[
            go.Frame(
                data=[go.Histogram(x=frames[col], histnorm='probability density', name="limit"), go.Scatter(x=np.linspace(-5, 5, 100), y=generateIdealNormalDistribution(), name="Normal distribution")],
                layout=go.Layout(
                  title="Central limit theorem (n={0}))".format(col)
                ))
             for col in frames.keys()]
)


fig.update_layout(
    font=dict(size=10),
    height=400,
    width=600
)

fig.show()

In [60]:
hist, hist_ticks = np.histogram(generateNormalDistribution(size=100000), bins=int(1 + 3.322 * np.log10(100000)), density=True)

In [61]:
fig = go.Figure(data=[
                      go.Bar(x=hist_ticks, y=hist)
])

fig.show()