<a href="https://colab.research.google.com/github/fbeilstein/machine_learning/blob/master/gallery.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Lecture ?? "Statistics"

In [1]:
#@title #Bernoulli distribution
from scipy.stats import bernoulli, binom, poisson
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets

# Properties for plots
plt.style.use([{
    "figure.figsize":(12,9), # Figure size
    "xtick.labelsize": "large", # Font size of the X-ticks
    "ytick.labelsize": "large", # Font size of the Y-ticks
    "legend.fontsize": "x-large", # Font size of the legend
    "axes.labelsize": "x-large", # Font size of labels
    "axes.titlesize": "xx-large", # Font size of title
    "axes.spines.top": False,
    "axes.spines.right": False,
},'seaborn-poster'])

@widgets.interact(n=(0,30), p=(0.0,1.0), samples=(1,1000), continuous_update=False)
def plot_pmf(n, p, samples=100, histogram=False):
    k = np.arange(0, n + 1)
    P_binom = binom.pmf(k, n, p)
    plt.plot(k, P_binom, '-o', color='r')
    if histogram:
        height,y = np.histogram(binom.rvs(size=samples, n=n, p=p), range=(0,n), bins=n+1, density=True)
        plt.bar(k,height,color='b')
    plt.title('PMF of Bin(%i, %.2f)' % (n, p))
    plt.xlabel('k')
    plt.ylabel('$B_{%i, %.2f}(k)$' % (n, p))
    plt.show()

interactive(children=(IntSlider(value=15, description='n', max=30), FloatSlider(value=0.5, description='p', ma…

In [2]:
#@title #Poisson distribution

from scipy.stats import bernoulli, binom, poisson
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets

# Properties for plots
plt.style.use([{
    "figure.figsize":(12,9), # Figure size
    "xtick.labelsize": "large", # Font size of the X-ticks
    "ytick.labelsize": "large", # Font size of the Y-ticks
    "legend.fontsize": "x-large", # Font size of the legend
    "axes.labelsize": "x-large", # Font size of labels
    "axes.titlesize": "xx-large", # Font size of title
    "axes.spines.top": False,
    "axes.spines.right": False,
},'seaborn-poster'])

@widgets.interact(n=(0,50),samples=(1,1000),λ=(0.0,30.0))
def f(n, λ, samples=100,histogram=False):
    k = np.arange(0, n+1)
    P_poisson = poisson.pmf(k, λ)
    plt.plot(k, P_poisson, '-o', color='r')
    if histogram:
        height,y = np.histogram(poisson.rvs(size=samples,mu=λ), range=(0,n), bins=n+1, density=True)
        plt.bar(k,height,color='b')
    plt.title('PMF of Poisson(%i)' %λ)
    plt.xlabel('Number of Events')
    plt.ylabel('Probability of Number of Events')
    plt.show()

interactive(children=(IntSlider(value=25, description='n', max=50), FloatSlider(value=15.0, description='λ', m…

In [3]:
#@title #Exponential distribution

import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets
import scipy.stats

# Properties for plots
plt.style.use([{
    "figure.figsize":(12,9), # Figure size
    "xtick.labelsize": "large", # Font size of the X-ticks
    "ytick.labelsize": "large", # Font size of the Y-ticks
    "legend.fontsize": "x-large", # Font size of the legend
    "axes.labelsize": "x-large", # Font size of labels
    "axes.titlesize": "xx-large", # Font size of title
    "axes.spines.top": False,
    "axes.spines.right": False,
},'seaborn-poster'])

@widgets.interact(lam=(0.01,10.0,0.01), SampleSize=(1,1000), continous_update=False)
def f(lam, SampleSize=100, histogram=False):
  x_max = 40
  x = np.arange(0, x_max, x_max/1000)
  y = lam*np.exp(-lam*x)
  z = 1 - np.exp(-lam*x)
  # Plot the PDF
  plt.plot(x, y, linewidth = 3.0, label = 'PDF')
  if histogram:
   samples = scipy.stats.expon.rvs(1/lam, size=SampleSize)
   plt.hist(samples, bins=30, density=True)
  plt.xlim([0,x_max])
  plt.title("Exponential({})".format(lam))
  plt.xlabel('x')
  plt.ylabel('y')
  plt.legend()
  plt.show()

interactive(children=(FloatSlider(value=5.0, description='lam', max=10.0, min=0.01, step=0.01), IntSlider(valu…

In [4]:
#@title #Normal distribution
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets
import scipy.stats

# Properties for plots
plt.style.use([{
    "figure.figsize":(12,9), # Figure size
    "xtick.labelsize": "large", # Font size of the X-ticks
    "ytick.labelsize": "large", # Font size of the Y-ticks
    "legend.fontsize": "x-large", # Font size of the legend
    "axes.labelsize": "x-large", # Font size of labels
    "axes.titlesize": "xx-large", # Font size of title
    "axes.spines.top": False,
    "axes.spines.right": False,
},'seaborn-poster'])

# Plots pdf of a normal distribution
@widgets.interact(mu=(-25.0,25.0), var=(0.3,30.0))
def plot_pdf(mu, var):
    '''
    Takes mean, variance, range of x and y axes, and whether CDF desired.
    Plots PDF, and optionally CDF.
    '''
    x = np.linspace(-50, 50, 1001)
    sig = var**0.5
    P_norm = scipy.stats.norm.pdf(x, mu, sig)
    
    plt.plot(x, P_norm, 'b', linewidth=3.0, label = "PDF")
    y0 = (1/(sig*np.sqrt(2*np.pi)))*np.exp(-0.5)
    ym = 1/(sig*np.sqrt(2*np.pi))
    plt.plot([mu-sig, mu-sig], [0, y0], 'm--',linewidth = 2.0)
    plt.plot([mu+sig, mu+sig], [0, y0], 'm--', linewidth = 2.0, label = '$\mu\pm\sigma$')
    plt.plot([mu,mu], [0,ym], 'g--', linewidth = 2.0, label = r'$\mu$')
    
    plt.title('PDF of N({},{})'.format(mu,var))
    plt.xlabel('x')
    plt.ylabel('y')
    plt.legend()
    plt.show()

interactive(children=(FloatSlider(value=0.0, description='mu', max=25.0, min=-25.0), FloatSlider(value=15.15, …

In [5]:
#@title Mean of sample and population

###### EXAMPLE ######
#
# Estimating mean value of the population
# 1. See that different samples have different means
# 2. See that estimator has its own distribution
# 3. See how distribution changes with size of samples
#
#####################

import itertools
import statistics
import matplotlib.pyplot as plt

import ipywidgets as widgets
import numpy as np

def generate_histogram(size):
  all_sums = {e:1 for e in population}
  for _ in range(size-1):
    tmp = {}
    for el in population:
      for sum_,cnt_ in all_sums.items():
        s = sum_ + el
        if s in tmp:
          tmp[s] += cnt_
        else:
          tmp[s] = cnt_
    all_sums = tmp
  all_means = [s/size for s in all_sums.keys()]
  norm = size / sum(all_sums.values())
  heights = [s*norm for s in all_sums.values()]
  return all_means, heights


population = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

def plot_histos(size1, size2):
  plt.figure(figsize=(10,8))
  sample1_bins, sample1_heights = generate_histogram(size1)
  plt.bar(sample1_bins, sample1_heights, alpha=0.5, width=1/size1)
  sample2_bins, sample2_heights = generate_histogram(size2)
  plt.bar(sample2_bins, sample2_heights, alpha=0.5, width=1/size2)
  h = np.max([np.max(sample2_heights), np.max(sample1_heights)])
  plt.plot([np.mean(population), np.mean(population)], [0, h], 'm--',linewidth = 2.0)
  plt.xlabel('mean of a sample', fontsize=16)
  plt.ylabel('probability', fontsize=16)
  plt.title('probabilities to get different means', fontsize=20)
  plt.legend(["true mean", "sample 1", "sample 2"], loc=2)
  

slider_sample_1 = widgets.IntSlider(min=1, max=10, value=1, description='sample 1 size')
slider_sample_2 = widgets.IntSlider(min=1, max=10, value=2, description='sample 2 size')

widgets.interact(plot_histos, size1=slider_sample_1, size2=slider_sample_2);

interactive(children=(IntSlider(value=1, description='sample 1 size', max=10, min=1), IntSlider(value=2, descr…

In [6]:
#@title #Sample variance for normal distribution
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np

@widgets.interact(n=(2, 20))
def Normal_Variance(n):
# n: sample size
# s: number of experiments
# ddof: degree of freedom
#     ddof=0: calculate "raw" variance 
#     ddof=1: calculate unbiased variance 
    plt.figure(figsize=(12, 8))
    plt.xlim([0, 4])
    
    s = 1000000
    X = np.random.normal(0, 1, [n,s])
    
    V = np.var(X, axis=0, ddof=0)
    v = np.mean(V)
    Vu = np.var(X, axis=0, ddof=1)
    vu = np.mean(Vu)
    
    
    counts, bins, bars = plt.hist(V, bins=60, density=True, alpha=0.5);
    h = np.max(counts)
    counts, bins, bars = plt.hist(Vu, bins=60, density=True, alpha=0.5);
    hu = np.max(counts)
    plt.plot([v,v], [0, np.max([h, hu])], 'r--', linewidth = 2.0)
    plt.plot([vu,vu], [0, np.max([h, hu])], 'b-', linewidth = 1.0)

    plt.plot([1,1], [0, np.max([h, hu])], 'g:', linewidth = 3.0)
    plt.ylabel('frequency', fontsize = 15)
    plt.grid()
    plt.title('histogram of sample variances with sample size n=%d'%n, fontsize = 15)
    plt.xlabel('Sample Variance', fontsize = 15)
    plt.legend(['biased variance', 'unbiased variance', 'population variance', 'biased variances', 'unbiased variances'])
    
    ax2 = plt.axes([0.65, 0.2, 0.2, 0.4])
    x = np.linspace(-8, 8, 1001)
    P_norm = scipy.stats.norm.pdf(x, 0, 1)
    ax2.plot(x, P_norm)
    plt.title("Population distribution")

interactive(children=(IntSlider(value=11, description='n', max=20, min=2), Output()), _dom_classes=('widget-in…

In [7]:
#@title #Correcting standart deviation for normal distribution

import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets
import scipy
import scipy.stats

def Normal_SD(n, d):
# n: sample size
# s: number of experiments

    plt.figure(figsize=(12,8))
    plt.xlim([0,3])
    plt.title('histogram of sample standard deviation with sample size n=%d'%n, fontsize = 15)
    plt.xlabel('$\hat{\sigma}$', fontsize = 15)
    
    s = 1000000
    X = np.random.normal(0, 1, [n, s])
    V = np.sqrt(np.var(X, axis=0, ddof=d))
    v = np.mean(V)
    
    plt.plot([v,v], [0, 2], 'r--', linewidth = 2.0)
    plt.hist(V, bins=60, density=True);

    plt.plot([1,1], [0, 2], 'g:', linewidth = 2.0)
    plt.ylabel('frequency', fontsize = 15)
    plt.grid() 
    
    plt.legend(["estimated $\sigma$", "true $\sigma$", "$\sigma$ histogram" ])
    
    ax2 = plt.axes([0.65, 0.3, 0.2, 0.4])
    x = np.linspace(-10, 10, 1001)
    P_norm = scipy.stats.norm.pdf(x, 0, 1)
    ax2.plot(x, P_norm)
    plt.title("Population distribution")
    
    
widgets.interact(Normal_SD,
    n = widgets.IntSlider(min=2, max=10, description='n=', step=1, value=2),
    d = widgets.FloatSlider(min=-1.0, max=2.0, description='d=', value=0.0));

interactive(children=(IntSlider(value=2, description='n=', max=10, min=2), FloatSlider(value=0.0, description=…

In [8]:
#@title #MLE for gaussian with fixed dispersion
from scipy.stats import bernoulli, binom, poisson
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets
import scipy

# Properties for plots
plt.style.use([{
    "figure.figsize":(12,9), # Figure size
    "xtick.labelsize": "large", # Font size of the X-ticks
    "ytick.labelsize": "large", # Font size of the Y-ticks
    "legend.fontsize": "x-large", # Font size of the legend
    "axes.labelsize": "x-large", # Font size of labels
    "axes.titlesize": "xx-large", # Font size of title
    "axes.spines.top": False,
    "axes.spines.right": False,
},'seaborn-poster'])


def generate_sample(var, mu, sample_size):
  sig = np.sqrt(var)
  return scipy.stats.norm.rvs(mu, sig, size=sample_size)

def plot_sample(sample):
  plt.plot(sample, np.zeros_like(sample), 'o', color='r', alpha=0.8)

def plot_norm_distribution(var, mu, color='b'):
  x = np.linspace(-50, 50, 1001)
  sig = np.sqrt(var)
  y = scipy.stats.norm.pdf(x, mu, sig)
  plt.plot(x, y, color, linewidth=3.0)

def plot_sampled(var, mu, sample):
  sig = np.sqrt(var)
  y = scipy.stats.norm.pdf(sample, mu, sig)
  for x_, y_ in zip(sample, y):
    plt.plot([x_, x_], [0, y_], 'm--',linewidth = 2.0)
  plt.plot(sample, y, 'o', color='b', alpha=1.0)
  
def get_likelihood(sample, mu, var):
  sig = np.sqrt(var)
  return np.sum(np.log(scipy.stats.norm.pdf(sample, mu, sig)))

def plot_likelihoods(sample, var):
  mu = np.linspace(-50, 50, 1001)
  l = [get_likelihood(sample, mu_, var) for mu_ in mu]
  plt.plot(mu, l, 'b', linewidth=3.0)


var = 160.0
mu = 0.0
old_sample_size = 1
sample = np.array([])

button = widgets.Button(value=False, description='Generate sample', icon='check')
slider_mu = widgets.FloatSlider(min=-50.0, max=50.0, value=0.0, description='mu')
slider_sample = widgets.IntSlider(min=1, max=30, value=3, description='sample size')


@button.on_click
def plot_on_click(b):
  global sample
  sample = generate_sample(var, mu, old_sample_size)
  if slider_mu.value > 0.0:
    slider_mu.value = slider_mu.value - 1E-10
  else: 
    slider_mu.value = slider_mu.value + 1E-10

def plot_likelihood_estimator(mu_test, sample_size):
  global old_sample_size
  global sample
  if old_sample_size != sample_size:
    sample = generate_sample(var, mu, sample_size)
    old_sample_size = sample_size

  plt.subplot(1, 2, 1)
  plt.title('PDF')
  plot_sample(sample)
  plot_norm_distribution(var, mu_test)
  plot_norm_distribution(var, mu, color='r')
  plot_sampled(var, mu_test, sample)

  plt.subplot(1, 2, 2)
  plt.title('Log of likelihood')
  plot_likelihoods(sample, var)
  plt.plot(mu_test, get_likelihood(sample, mu_test, var), 'o', color='b', alpha=1.0);


display(button)
widgets.interact(plot_likelihood_estimator, mu_test=slider_mu, sample_size=slider_sample);  

Button(description='Generate sample', icon='check', style=ButtonStyle())

interactive(children=(FloatSlider(value=0.0, description='mu', max=50.0, min=-50.0), IntSlider(value=3, descri…

In [9]:
#@title #MLE for gaussian with fixed mean
from scipy.stats import bernoulli, binom, poisson
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets
import scipy

# Properties for plots
plt.style.use([{
    "figure.figsize":(12,9), # Figure size
    "xtick.labelsize": "large", # Font size of the X-ticks
    "ytick.labelsize": "large", # Font size of the Y-ticks
    "legend.fontsize": "x-large", # Font size of the legend
    "axes.labelsize": "x-large", # Font size of labels
    "axes.titlesize": "xx-large", # Font size of title
    "axes.spines.top": False,
    "axes.spines.right": False,
},'seaborn-poster'])


def generate_sample(sig, mu, sample_size):
  return scipy.stats.norm.rvs(mu, sig, size=sample_size)

def plot_sample(sample):
  plt.plot(sample, np.zeros_like(sample), 'o', color='r', alpha=0.8)

def plot_norm_distribution(sig, mu, color='b'):
  x = np.linspace(-50, 50, 1001)
  y = scipy.stats.norm.pdf(x, mu, sig)
  plt.plot(x, y, color, linewidth=3.0)

def plot_sampled(sig, mu, sample):
  y = scipy.stats.norm.pdf(sample, mu, sig)
  for x_, y_ in zip(sample, y):
    plt.plot([x_, x_], [0, y_], 'm--',linewidth = 2.0)
  plt.plot(sample, y, 'o', color='b', alpha=1.0)
  
def get_likelihood(sample, mu, sig):
  return np.prod((scipy.stats.norm.pdf(sample, mu, sig)))

def plot_likelihoods(sample, mu):
  sig = np.linspace(0.1, 100, 1001)
  l = [get_likelihood(sample, mu, sig_) for sig_ in sig]
  plt.plot(sig, l, 'b', linewidth=3.0)


sig = 10.0
mu = 0.0
old_sample_size = 1
sample = np.array([])

button = widgets.Button(value=False, description='Generate sample', icon='check')
slider_sig = widgets.FloatSlider(min=0.1, max=30.0, value=0.1, description='sigma')
slider_sample = widgets.IntSlider(min=1, max=30, value=3, description='sample size')


@button.on_click
def plot_on_click(b):
  global sample
  sample = generate_sample(sig, mu, old_sample_size)
  if slider_sig.value > 10.0:
    slider_sig.value = slider_sig.value - 1E-10
  else: 
    slider_sig.value = slider_sig.value + 1E-10

def plot_likelihood_estimator(sig_test, sample_size):
  global old_sample_size
  global sample
  if old_sample_size != sample_size:
    sample = generate_sample(sig, mu, sample_size)
    old_sample_size = sample_size

  plt.subplot(1, 2, 1)
  plt.title('PDF')
  plot_sample(sample)
  plot_norm_distribution(sig_test, mu)
  plot_norm_distribution(sig, mu, color='r')
  plot_sampled(sig_test, mu, sample)

  plt.subplot(1, 2, 2)
  plt.title('Likelihood')
  plot_likelihoods(sample, mu)
  plt.plot(sig_test, get_likelihood(sample, mu, sig_test), 'o', color='b', alpha=1.0);


display(button)
widgets.interact(plot_likelihood_estimator, sig_test=slider_sig, sample_size=slider_sample);  



Button(description='Generate sample', icon='check', style=ButtonStyle())

interactive(children=(FloatSlider(value=0.1, description='sigma', max=30.0, min=0.1), IntSlider(value=3, descr…

In [10]:
#@title #Correlation coefficient

import IPython
from google.colab import output

main_str = '''
  <script type="application/javascript">
  
    var active_pt = null;
    var all_pts = [];
    var classAbtn = [650, 100];
    var classBbtn = [650, 200];
    var ptR = 9.0;
    var params = null;
    var debug_txt = "";
    var bg_image = new Image();

    function is_close(pt1, pt2) {
      return   (pt1[0] - pt2[0])*(pt1[0] - pt2[0])
            +  (pt1[1] - pt2[1])*(pt1[1] - pt2[1])
            <= ptR*ptR;
    }
  
    function circ(ctx, pos) {
      ctx.beginPath();
      ctx.arc(pos[0], pos[1], ptR, 0.0, 2.0 * Math.PI, 0);
      ctx.fillStyle = 'rgba(200, 0, 0, 0.65)';
      ctx.fill();
    }
    
    function mean(idx) {
      var sum = 0.0;
      var num = 0;
      if (active_pt && active_pt[0] > 0 && active_pt[0] < 600 && active_pt[1] > 0 && active_pt[1] < 600) {
        sum = active_pt[idx];
        num = 1;
      }
      all_pts.forEach(function (item, index) {
          sum += item[idx];
          num += 1;
        });
      return sum / num;
    }

    function correlation(m_x, m_y) {
      cov_xy = 0.0;
      s_x2 = 0.0;
      s_y2 = 0.0;
      if (active_pt && active_pt[0] > 0 && active_pt[0] < 600 && active_pt[1] > 0 && active_pt[1] < 600) {
        dx = active_pt[0] - m_x;
        dy = active_pt[1] - m_y;
        cov_xy = dx * dy;
        s_x2 = dx * dx;
        s_y2 = dy * dy;
      }
      all_pts.forEach(function (item, index) {
          dx = item[0] - m_x;
          dy = item[1] - m_y;
          cov_xy += dx * dy;
          s_x2 += dx * dx;
          s_y2 += dy * dy;
        });
      return - cov_xy / Math.sqrt(s_x2 * s_y2);
    }
    
    function draw() {
      var canvas = document.getElementById('canvas');
      if (canvas.getContext) {
        var ctx = canvas.getContext('2d');
        
        ctx.clearRect(0, 0, canvas.width, canvas.height); // cleanup before start
        ctx.drawImage(bg_image, 0, 0, 600, 600);
        ctx.strokeRect(0, 0, 600, 600); // field
        
        ctx.font = '20px serif';
        ctx.fillStyle = 'black';
        ctx.fillText('Feature', 670, 105);
        ctx.fillText('Correlation: ', 650, 200);
        ctx.fillText(debug_txt, 200, 200);
        circ(ctx, classAbtn);
        ctx.strokeRect(625, 75, 150, 50);
        ctx.strokeRect(625, 175, 150, 50);

        var m_x = mean(0);
        var m_y = mean(1);
        ctx.fillStyle = 'rgba(0, 0, 200, 0.3)';
        ctx.fillRect(m_x, m_y, 600-m_x, 600-m_y);
        ctx.fillRect(0, 0, m_x, m_y);
        ctx.fillStyle = 'rgba(200, 0, 0, 0.2)';
        ctx.fillRect(m_x, 0, 600-m_x, m_y);
        ctx.fillRect(0, m_y, m_x, 600-m_y);

        var corr = correlation(m_x, m_y);
        ctx.fillStyle = 'black';
        ctx.fillText(corr.toFixed(5), 650, 220);
        
        if (active_pt) {
          circ(ctx, [active_pt[0], active_pt[1]]);
        }
        all_pts.forEach(function (item, index) {
          circ(ctx, [item[0], item[1]]);
        });
        
      }
    }
    
    var timer
    function init() {
      timer = setInterval(draw, 10);
    }
    
    function mdown_handle(evt) {
      x = evt.offsetX;
      y = evt.offsetY;
      if (is_close([x,y], classAbtn)) {
        active_pt = [x, y]
      }
      idx = -1;
      all_pts.forEach(function (item, index) {
        if (is_close([x,y], [item[0], item[1]])) {
          idx = index;
        }
      });
      if (idx > -1) {
        active_pt = all_pts[idx];
        all_pts.splice(idx, 1);
      }
    }
    
    function mmove_handle(evt) {
      x = evt.offsetX;
      y = evt.offsetY;
      if (active_pt) {
        active_pt[0] = x;
        active_pt[1] = y;
      }
    }
    
    function mup_handle(evt) {
      x = evt.offsetX;
      y = evt.offsetY;
      if (x >= 600) {
        active_pt = null;
      }
      if (active_pt) {
        all_pts.push(active_pt);
        active_pt = null;
      }
    }
    
  </script>
  <canvas id="canvas" width="800" height="600" 
    onmousedown="mdown_handle(event)"
    onmousemove="mmove_handle(event)"
    onmouseup="mup_handle(event)"></canvas>
  <script> init();</script>
'''

  
display(IPython.display.HTML(main_str))

In [11]:
#@title #Simpson Paradox

import IPython
from google.colab import output

main_str = '''
  <script type="application/javascript">
  
    var active_pt = null;
    var all_pts = [];
    var classAbtn = [650, 100];
    var classBbtn = [650, 200];
    var ptR = 9.0;
    var params = null;
    var debug_txt = "";
    var bg_image = new Image();

    function is_close(pt1, pt2) {
      return   (pt1[0] - pt2[0])*(pt1[0] - pt2[0])
            +  (pt1[1] - pt2[1])*(pt1[1] - pt2[1])
            <= ptR*ptR;
    }
  
    function circ(ctx, pos, cls) {
      ctx.beginPath();
      ctx.arc(pos[0], pos[1], ptR, 0.0, 2.0 * Math.PI, 0);
      ctx.fillStyle = (cls == 0 ? 'rgba(200, 0, 0, 0.5)' : 'rgba(0, 0, 200, 0.5)');
      ctx.fill();
    }

    function in_rect(pos, rect) {
      if (pos[0] < rect[0] || pos[0] > (rect[0] + rect[2]))
        return false;
      if (pos[1] < rect[1] || pos[1] > (rect[1] + rect[3]))
        return false;
      return true;
    }

    function get_survivals() {
      var g1A = [0, 0];
      var g1B = [0, 0];
      var g2A = [0, 0];
      var g2B = [0, 0];
      all_pts.forEach(function (item, index) {
        if (in_rect(item, [0, 100, 200, 250]))
          g1A[item[2]] += 1;
        if (in_rect(item, [400, 100, 200, 250]))
          g1B[item[2]] += 1;
        if (in_rect(item, [0, 350, 200, 250]))
          g2A[item[2]] += 1;
        if (in_rect(item, [400, 350, 200, 250]))
          g2B[item[2]] += 1;
      });
        if (active_pt && in_rect(active_pt, [0, 100, 200, 250]))
          g1A[active_pt[2]] += 1;
        if (active_pt && in_rect(active_pt, [400, 100, 200, 250]))
          g1B[active_pt[2]] += 1;
        if (active_pt && in_rect(active_pt, [0, 350, 200, 250]))
          g2A[active_pt[2]] += 1;
        if (active_pt && in_rect(active_pt, [400, 350, 200, 250]))
          g2B[active_pt[2]] += 1;      
      return [g1A[0]/(g1A[0]+g1A[1]), 
              g1B[0]/(g1B[0]+g1B[1]), 
              g2A[0]/(g2A[0]+g2A[1]), 
              g2B[0]/(g2B[0]+g2B[1]),
              (g1A[0] + g2A[0])/(g1A[0]+g1A[1]+g2A[0]+g2A[1]),
              (g1B[0] + g2B[0])/(g1B[0]+g1B[1]+g2B[0]+g2B[1])];
    }

    function draw() {
      var canvas = document.getElementById('canvas');
      if (canvas.getContext) {
        var ctx = canvas.getContext('2d');
        
        ctx.clearRect(0, 0, canvas.width, canvas.height); // cleanup before start
        ctx.strokeRect(0, 0, 600, 600); // field
        ctx.strokeRect(0, 100, 200, 250);
        ctx.strokeRect(400, 100, 200, 250);
        ctx.strokeRect(0, 350, 200, 250);
        ctx.strokeRect(400, 350, 200, 250);

        
        ctx.font = '20px serif';
        ctx.fillStyle = 'black';
        ctx.fillText('Group 1', 50, 150);
        ctx.fillText('Group 2', 50, 400);
        ctx.fillText('Group 1', 450, 150);
        ctx.fillText('Group 2', 450, 400);
        ctx.fillText('Alive', 670, 105);
        ctx.fillText('Dead', 670, 206);
        ctx.fillText('Survival', 210, 200);
        ctx.fillText('Survival', 320, 200);
        ctx.fillText('Survival', 210, 450);
        ctx.fillText('Survival', 320, 450);

        var s = get_survivals();
        ctx.fillText((s[0]*100.0).toFixed(1) + '%', 210, 220);
        ctx.fillText((s[2]*100.0).toFixed(1) + '%', 210, 470);
        ctx.fillText((s[1]*100.0).toFixed(1) + '%', 320, 220);
        ctx.fillText((s[3]*100.0).toFixed(1) + '%', 320, 470);
        ctx.fillText('Hospital A, Survival:'+(s[4]*100.0).toFixed(1) + '%', 50, 50);
        ctx.fillText('Hospital B, Survival:'+(s[5]*100.0).toFixed(1) + '%', 350, 50);

        ctx.font = '40px serif';
        ctx.fillStyle = 'black';
        if (s[0] > s[1])
        	ctx.fillText('>', 290, 220);
        if (s[0] < s[1])
        	ctx.fillText('<', 290, 220);
        if (s[0] == s[1])
        	ctx.fillText('=', 290, 220);
        if (s[2] > s[3])
        	ctx.fillText('>', 290, 470);
        if (s[2] < s[3])
        	ctx.fillText('<', 290, 470);
        if (s[2] == s[3])
        	ctx.fillText('=', 290, 470);
        if (s[4] > s[5])
        	ctx.fillText('>', 290, 50);
        if (s[4] < s[5])
        	ctx.fillText('<', 290, 50);
        if (s[4] == s[5])
        	ctx.fillText('=', 290, 50);

        ctx.fillText(debug_txt, 200, 200);
        circ(ctx, classAbtn, 0);
        circ(ctx, classBbtn, 1);
        ctx.strokeRect(625, 75, 150, 50);
        ctx.strokeRect(625, 175, 150, 50);
        
        if (active_pt) {
          circ(ctx, [active_pt[0], active_pt[1]], active_pt[2]);
        }
        all_pts.forEach(function (item, index) {
          circ(ctx, [item[0], item[1]], item[2]);
        });
        
      }
    }
    
    var timer
    function init() {
      timer = setInterval(draw, 10);
    }
    
    function mdown_handle(evt) {
      x = evt.offsetX;
      y = evt.offsetY;
      if (is_close([x,y], classAbtn)) {
        active_pt = [x, y, 0]
      }
      if (is_close([x,y], classBbtn)) {
        active_pt = [x, y, 1]
      }
      idx = -1;
      all_pts.forEach(function (item, index) {
        if (is_close([x,y], [item[0], item[1]])) {
          idx = index;
        }
      });
      if (idx > -1) {
        active_pt = all_pts[idx];
        all_pts.splice(idx, 1);
      }
    }
    
    function mmove_handle(evt) {
      x = evt.offsetX;
      y = evt.offsetY;
      if (active_pt) {
        active_pt[0] = x;
        active_pt[1] = y;
      }
    }
    
    function mup_handle(evt) {
      x = evt.offsetX;
      y = evt.offsetY;
      if (!in_rect([x,y], [0, 100, 200, 500]) && !in_rect([x,y], [400, 100, 200, 500])) {
        active_pt = null;
      }      
      if (active_pt) {
        all_pts.push(active_pt);
        active_pt = null;
      }
    }
    
  </script>
  <canvas id="canvas" width="800" height="600" 
    onmousedown="mdown_handle(event)"
    onmousemove="mmove_handle(event)"
    onmouseup="mup_handle(event)"></canvas>
  <script> init();</script>
'''

  
display(IPython.display.HTML(main_str))

#Lecture ?? "Naive Bayes Classification"

In [12]:
#@title #Monty Hall paradox
import IPython
from google.colab import output

main_str = '''
<html>
<head>
<style type="text/css">
img {
        height: 450px;
        width: 220px;
        float:left;
}
</style>
<script type="text/javascript">

tempImg = new Image()
tempImg.src = "https://raw.githubusercontent.com/fbeilstein/machine_learning/master/lecture_5_naive_bayes/mh_5.png"

var state = 0;
var prize = Math.floor(Math.random() * 3) + 1;
var stay = 0;
var change = 0;

var stay_correct = 0;
var stay_incorrect = 0;
var change_correct = 0;
var change_incorrect = 0;

function click_img(num) {
  if (state == 0) {
    let intersection = [1, 2, 3].filter(x => ![num, prize].includes(x));
    var open = intersection[Math.floor(Math.random() * intersection.length)];
    stay = num;
    change = [1, 2, 3].filter(x => ![open, stay].includes(x))[0];
    //document.getElementById("stay_incorrect").innerHTML = "c:" + change + ", o:" + open + ",s:" + stay + ",P:" + prize + "," + intersection;
    
    document.getElementById("d" + open).src = "https://raw.githubusercontent.com/fbeilstein/machine_learning/master/lecture_5_naive_bayes/nb_6.png";
    document.getElementById("d" + stay).src = "https://raw.githubusercontent.com/fbeilstein/machine_learning/master/lecture_5_naive_bayes/mh_2.png";
    document.getElementById("d" + change).src = "https://raw.githubusercontent.com/fbeilstein/machine_learning/master/lecture_5_naive_bayes/mh_3.png";
    state = 1;
  } else if (state == 1 && (num == stay || num == change)) {
    for (var i = 1; i < 4; ++i)
      document.getElementById("d" + i).src = "https://raw.githubusercontent.com/fbeilstein/machine_learning/master/lecture_5_naive_bayes/nb_6.png";
    document.getElementById("d" + prize).src = "https://raw.githubusercontent.com/fbeilstein/machine_learning/master/lecture_5_naive_bayes/mh_5.png";
    if (num == prize && num == stay) stay_correct += 1;
    if (num == prize && num == change) change_correct += 1;
    if (num != prize && num == stay) stay_incorrect += 1;
    if (num != prize && num == change) change_incorrect += 1;

    document.getElementById("switch_correct").innerHTML = change_correct;
    document.getElementById("stay_correct").innerHTML = stay_correct;
    document.getElementById("switch_incorrect").innerHTML = change_incorrect;
    document.getElementById("stay_incorrect").innerHTML = stay_incorrect;
    document.getElementById("switch_proportion").innerHTML = Math.round((change_correct/(change_correct + change_incorrect + 0.000001))*1000)/1000;
    document.getElementById("stay_proportion").innerHTML = Math.round((stay_correct/(stay_correct + stay_incorrect + 0.000001))*1000)/1000;
    state = 2;
  } else if (state == 2) {
    for (var i = 1; i < 4; ++i)
      document.getElementById("d" + i).src = "https://raw.githubusercontent.com/fbeilstein/machine_learning/master/lecture_5_naive_bayes/mh_1.png";
    prize = Math.floor(Math.random() * 3) + 1;
    state = 0;
  }
}

</script>
</head>

<body onload="getImages()">

<table>
<tr>
  <td><img src="https://raw.githubusercontent.com/fbeilstein/machine_learning/master/lecture_5_naive_bayes/mh_1.png" id = "d1" onclick="click_img(1)"></td>
  <td><img src="https://raw.githubusercontent.com/fbeilstein/machine_learning/master/lecture_5_naive_bayes/mh_1.png" id = "d2" onclick="click_img(2)"></td>
  <td><img src="https://raw.githubusercontent.com/fbeilstein/machine_learning/master/lecture_5_naive_bayes/mh_1.png" id = "d3" onclick="click_img(3)"></td>
<tr/>
</table>
<table>
<tr>
  <td><b>Switch Trials</b></td><td></td>
  <td><b>Stay Trials</b></td><td></td>
</tr>
<tr>
  <td>Number Correct:</td>
  <td id="switch_correct">0</td>
  <td>Number Correct:</td>
  <td id="stay_correct">0</td>
<tr/>
<tr>
  <td>Number Incorrect:</td>
  <td id="switch_incorrect">0</td>
  <td>Number Incorrect:</td>
  <td id="stay_incorrect">0</td>
<tr/>
<tr>
  <td>Proportion Correct:</td>
  <td id="switch_proportion">0</td>
  <td>Proportion Correct:</td>
  <td id="stay_proportion">0</td>
<tr/>
</table>
</body>
</html>
'''



def generate_html():
  s = main_str
  return s
  
display(IPython.display.HTML(generate_html()))

0,1,2
,,
,,

0,1,2,3
Switch Trials,,Stay Trials,
Number Correct:,0.0,Number Correct:,0.0
,,,
Number Incorrect:,0.0,Number Incorrect:,0.0
,,,
Proportion Correct:,0.0,Proportion Correct:,0.0
,,,


In [13]:
#@title #1-dimensional Naive Bayes Classification
from scipy.stats import norm
from sklearn.naive_bayes import GaussianNB
import ipywidgets as widgets
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(1);

#@widgets.interact(N_red=(1,1000,1),N_blue=(1,1000,1),red_mean=(-1,6,0.1),blue_mean=(-1,6,0.1),red_std=(0.1,3.0,0.1),blue_std=(0.1,3.0,0.1))
def plot_NB(N_red, N_blue, red_mean, blue_mean, red_std, blue_std):
  arr_red = np.random.normal(loc=red_mean, scale=red_std, size=N_red)
  arr_blue = np.random.normal(loc=blue_mean, scale=blue_std, size=N_blue)
  tags_red = np.full(N_red,0)
  tags_blue = np.full(N_blue,1)
  training_set = np.hstack((arr_red, arr_blue))
  x_min = min(training_set)
  x_max = max(training_set)

  labels_set = np.hstack((tags_red, tags_blue))
  training_set = np.expand_dims(training_set, axis=1)

  model = GaussianNB()
  model.fit(training_set, labels_set);

  fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(20,10))
  #fig = plt.figure(figsize=(20,8))
  ax1.plot(arr_blue, np.zeros_like(arr_blue)-0.02, 'o', color='b', alpha=0.1)
  ax1.plot(arr_red, np.zeros_like(arr_red)-0.02, 'o', color='r', alpha=0.1)

  x_axis = np.arange(x_min, x_max, 0.001)
  ax1.plot(x_axis, norm.pdf(x_axis, red_mean, red_std), color='r')
  ax1.plot(x_axis, norm.pdf(x_axis, blue_mean, blue_std), color='b')

  clrs = {0:'r', 1:'b'}
  X = np.linspace(x_min, x_max, num=200)
  X = np.expand_dims(X, axis=1)
  y_predicted = model.predict(X)
  idx = np.nonzero(y_predicted[:-1] != y_predicted[1:])[0]
  for i_from, i_to in zip(np.hstack([0, idx]), np.hstack([idx, len(X)-1])):
    ax1.axvspan(X[i_from], X[i_to], facecolor=clrs[y_predicted[i_from+1]], alpha=0.2)

  y_predicted_P = model.predict_proba(X)
  ax2.plot(X,y_predicted_P[:,0], 'o', color='r')
  ax2.plot(X,y_predicted_P[:,1], 'o', color='b')
  idx = (np.abs(y_predicted_P[:,0] - 0.5)).argmin()
  ax2.axvline(x=X[idx])

  plt.xlim([x_min, x_max])

  plt.show()

widgets.interact(plot_NB, N_red=widgets.IntSlider(min=1,max=1000,step=1,value=100),
                  N_blue=widgets.IntSlider(min=1,max=1000,step=1,value=50),
                  red_mean=widgets.FloatSlider(min=-1,max=6,step=0.1,value=-1.0),
                  blue_mean=widgets.FloatSlider(min=-1,max=6,step=0.1,value=2.0),
                  red_std=widgets.FloatSlider(min=0.1,max=3.0,step=0.1,value=0.5),
                  blue_std=widgets.FloatSlider(min=0.1,max=3.0,step=0.1,value=2.0));

interactive(children=(IntSlider(value=100, description='N_red', max=1000, min=1), IntSlider(value=50, descript…

In [16]:
#@title #2-dimensional Naive Bayes Classification
from sklearn.naive_bayes import GaussianNB

def fit_model(training_set, y):
  model = GaussianNB()
  model.fit(training_set, y);
  return model


import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns; sns.set();


def visualize(estimator, X, y):
  fig = plt.figure(figsize=(10,10))
  ax = fig.gca()
  ax.tick_params(axis="y",direction="in", pad=-40)
  ax.tick_params(axis="x",direction="in", pad=-15)
  ax.set_xticks(np.arange(-.75, 1, .25))
  ax.set_yticks(np.arange(-.75, 1, .25))
  
  # Plot the training points
  rgb = [[210, 0, 0], [0, 0, 150], [107, 142, 35], [150, 0, 150], [220, 105, 0]]
    
  rgb=np.array(rgb)/255.
  ax.scatter(X[:, 0], X[:, 1], c=rgb[y], s=30, zorder=3)
  ax.axis('tight')
  xlim = [-1, +1]
  ylim = [-1, +1]
  ax.set_xlim(xlim)
  ax.set_ylim(ylim)
  
  # fit the estimator
  xx, yy = np.meshgrid(np.linspace(*xlim, num=700), np.linspace(*ylim, num=700))
  Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()])

  # Put the result into a color plot
  n_classes = 5
  Z = Z.reshape(xx.shape)
  contours = ax.contourf(xx, yy, Z, alpha=0.3, levels=np.arange(n_classes + 1) - 0.5, cmap=ListedColormap(rgb), zorder=1)

  plt.close(fig)

  return fig

import base64
import io

def figure_to_base64(fig):
  buf = io.BytesIO()
  fig.savefig(buf, format='png', bbox_inches='tight', pad_inches=0);
  image_base64 = base64.b64encode(buf.getvalue()).decode('utf-8').replace('\n', '')
  buf.close()
  return image_base64

import json

def invoke(pts):
  data = np.array(pts).reshape((-1, 3))
  X = data[:, :2] / 300.0 - 1.0
  X[:, 1] *= -1.0
  y = data[:, 2]

  clf = fit_model(X, y)
  fig = visualize(clf, X, y)

  return IPython.display.JSON({'image': figure_to_base64(fig)})


import IPython
from google.colab import output

main_str = '''
  <script type="application/javascript">
  
    var active_pt = null;
    var all_pts = [];
    var class_btn = [[650, 50], [650, 150], [650, 250], [650, 350], [650, 450]];
    var class_name = ["class A", "class B", "class C", "class D", "class E"];
    var class_color = ["rgba(210, 0, 0, 0.75)", "rgba(0, 0, 150, 0.75)", 
         "rgba(107, 142, 35, 0.95)", "rgba(150, 0, 150, 0.75)", "rgba(220, 105, 0, 0.75)"]

    var ptR = 10.0;
    var params = null;
    var debug_txt = "";
    var bg_image = new Image();

    function is_close(pt1, pt2) {
      return   (pt1[0] - pt2[0])*(pt1[0] - pt2[0])
            +  (pt1[1] - pt2[1])*(pt1[1] - pt2[1])
            <= ptR*ptR;
    }
  
    function circ(ctx, pos, cls) {
      ctx.beginPath();
      ctx.arc(pos[0], pos[1], ptR, 0.0, 2.0 * Math.PI, 0);
      ctx.fillStyle = class_color[cls];
      ctx.fill();
    }
    
    function draw() {
      var canvas = document.getElementById('canvas');
      if (canvas.getContext) {
        var ctx = canvas.getContext('2d');
        
        ctx.clearRect(0, 0, canvas.width, canvas.height); // cleanup before start
        ctx.drawImage(bg_image, 0, 0, 600, 600);
        ctx.strokeRect(0, 0, 600, 600); // field
        
        for (var i = 0; i < 5; i++) {
          ctx.font = '20px serif';
          ctx.fillStyle = 'black';
          ctx.fillText(class_name[i], 700,  50 + 100*i);
          circ(ctx, class_btn[i], i);
          ctx.strokeRect(625, 25 + 100*i, 150, 50);
        }

        ctx.fillText(debug_txt, 200, 200);

        if (active_pt) {
          circ(ctx, [active_pt[0], active_pt[1]], active_pt[2]);
        }
        all_pts.forEach(function (item, index) {
          circ(ctx, [item[0], item[1]], item[2]);
        });

      }
    }
    
    var timer
    function init() {
      timer = setInterval(draw, 10);
    }
    
    function mdown_handle(evt) {
      x = evt.offsetX;
      y = evt.offsetY;
      for (var i = 0; i < 5; i++) { 
        if (is_close([x,y], class_btn[i])) { 
          active_pt = [x, y, i]
        }
      }

      idx = -1;
      all_pts.forEach(function (item, index) {
        if (is_close([x,y], [item[0], item[1]])) {
          idx = index;
        }
      });
      if (idx > -1) {
        active_pt = all_pts[idx];
        all_pts.splice(idx, 1);
      }
    }
    
    function mmove_handle(evt) {
      x = evt.offsetX;
      y = evt.offsetY;
      if (active_pt) {
        active_pt[0] = x;
        active_pt[1] = y;
      }
    }
    
    function mup_handle(evt) {
      x = evt.offsetX;
      y = evt.offsetY;
      if (x >= 600) {
        active_pt = null;
      }
      if (active_pt) {
        all_pts.push(active_pt);
        active_pt = null;
      }
    }

    async function calculate() {
      if (!all_pts)
        return;
      var merged = [].concat.apply([], all_pts);
      const result = await google.colab.kernel.invokeFunction('notebook.InvSVM', [merged], {});
      params = result.data['application/json'];
      bg_image.src = 'data:image/  png;base64,' + params.image;
    }

  </script>
  <canvas id="canvas" width="800" height="600" 
    onmousedown="mdown_handle(event)"
    onmousemove="mmove_handle(event)"
    onmouseup="mup_handle(event)"></canvas>
  <div style="position:absolute; top:500px; left:625px;">
    <button type="button" onclick="calculate();">
      Invoke Bayes
    </button>
  </div>
  <script> init();</script>
'''



def generate_html():
  s = main_str
  return s
  
display(IPython.display.HTML(generate_html()))
    
    
output.register_callback('notebook.InvSVM', invoke)