# Week 2: Introduction to Probability and Distributions

## Important notions
 - random variables
 - independence
 - probability notation (define with the help of Venn diagrams)
 - probability distributions
 - notions of continuous and discrete distributions
 - Gaussian
 - Lognormal (particles)
 - Poisson
 - Binomial

In [1]:
from ipywidgets import interact, interactive, fixed, interact_manual, FloatSlider, IntSlider
import ipywidgets as widgets
from ipywidgets.embed import embed_minimal_html
from IPython.display import display

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
font = {'family' : 'DejaVu Sans',
        'weight' : 'bold',
        'size'   : 48}

plt.rc('font', **font)

import pandas as pd
from scipy.stats import norm


## Gaussian Plot
By running this plot below, you will be able to see what the parameters $\mu$ and $\sigma$ do to the graph. What are the meanings of the parameters? Hint: you might want to increase the number of samples?

In [2]:
mu_slider = FloatSlider(min=-5, max=5.0, step=1, value=0, continuous_update=False)
sigma_slider = FloatSlider(min=0.1, max=2.0, step=0.1, value=1.0, continuous_update=False)
@interact(mu=mu_slider, sigma=sigma_slider)
def interactive_norm(mu, sigma):
    x=np.linspace(-5,5,10000)
    output = norm.pdf(x, mu, sigma)
    plt.style.use('ggplot')
    plt.figure(figsize=(8,5))
    plt.plot(x, output, c='red', lw=3)
    plt.grid(True)
    plt.xlim([-6, 6])
    plt.show()
    

interactive(children=(FloatSlider(value=0.0, continuous_update=False, description='mu', max=5.0, min=-5.0, ste…

## Demonstration of convergence of t-dist to Gaussian with number of samples

By running the script below and playing with the distribution parameters, for what number of samples can you be sure that you can just use the Gaussian? What if you extend the bounds for the number of samples in the positive direction? Does anything crazy happen? 

In [3]:
# Set up the graph with sliders
from scipy.stats import t

df_slider = IntSlider(min=1, max=50, step=1, value=1, continuous_update=False)
xbar_slider = IntSlider(min=-5, max=5, step=1, value=0, continuous_update=False)
s_slider = FloatSlider(min=0.2, max=5, step=0.2, value=1, continuous_update=False)

In [4]:
@interact(df=df_slider, xbar=xbar_slider, s=s_slider)
def interactive_tdist(df, xbar, s):
    x=np.linspace(-5,5,10000)
    output = t.pdf(x, df, xbar, s)
    plt.style.use('ggplot')
    plt.figure(figsize=(10,8))
    plt.plot(x, output, c='red', lw=3, label='t-distribution')
    plt.plot(x, norm.pdf(x, 0, 1), c='k', lw=1, label='Gaussian')
    plt.xlabel('X', fontsize=36)
    plt.ylabel('P(X)', fontsize=36)
    plt.grid(True)
    plt.xlim([-6, 6])
    plt.legend(fontsize=16)
    plt.show()

interactive(children=(IntSlider(value=1, continuous_update=False, description='df', max=50, min=1), IntSlider(…