# Week 9 (3/29-4/4)

## KDE and Bayes Classification

## Project 



## Resources

### 1. Histogram demo

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from ipywidgets import interact, fixed

plt.style.use('bmh')
%config InlineBackend.figure_format = 'retina'


def hist(data, bins=10, shift=0):
    '''
    bins:
        number of bins
    shift: 
        shift of bin boundaries. 
    '''
    
    plt.figure(figsize=(12,4))
    max_shift = 5
    data = np.array(data)
    plt.xlim(data.min() - max_shift, data.max() + max_shift)
    sns.histplot(data,
                 stat="density",
                 bins=bins,
                 binrange =  (data.min() - max_shift + shift,
                               data.max() + max_shift + shift)
                )
    sns.rugplot(data, height=0.05, color='w')
    plt.show()


N = 50
rng = np.random.default_rng(10)
data = rng.normal(loc=10, scale=5, size=N)
interact(hist, shift=(0, 5, 0.25), bins=(1, 30, 1), data=fixed(data));

### 2. Gaussian  kernel

$$f(x) = \frac{1}{h\sqrt{2\pi}}\exp\left(-\frac{1}{2}\left(\frac{x-X}{h}\right)^2\right)$$

In [1]:
def ker(x, mu, sigma):
    u = (x-mu)/sigma
    return np.exp(-0.5*u**2)/(sigma*(2*np.pi)**0.5)

### 3. KDE plot

In [8]:
def plot_kde(data, h=0.1):
    x = np.linspace(data.min() - 2, data.max() + 2, 400)
    plt.figure(figsize=(10,6))
    plt.plot(x, kde(x, data, h))
    plt.plot(data, [0]*len(data), 'r|', ms=15)

### 4. KDE integral widget

In [16]:
import scipy

def integrate(data, lower=7, upper=12):

    kde_data = scipy.stats.gaussian_kde(data)
    integral = kde_data.integrate_box(lower, upper)
    xfill = np.linspace(lower, upper, 100)

    x_min = data.min() - 1
    x_max = data.max() + 1
    x = np.linspace(x_min, x_max, 400)

    plt.figure(figsize=(10, 5))
    plt.xlim(x_min - 1, x_max + 1)
    plt.title(f"$\int^{{{upper:.2f}}}_{{{lower:.2f}}}f(t)dt$ = {integral:.3f}",
              fontsize=14,
              y=1.1)
    plt.plot(x, kde_data(x))
    if lower <= upper:
        plt.fill_between(xfill,
                         kde_data(xfill),
                         0,
                         color='steelblue',
                         alpha=0.5)
    plt.plot(data, [0] * len(data), 'r|', ms=15)


N = 50
rng = np.random.default_rng(10)
data = rng.normal(loc=10, scale=5, size=N)
interact(integrate,
         data=fixed(data),
         lower=(data.min() - 1, data.max() + 1, 1.0),
         upper=(data.min() - 1, data.max() + 1, 1.0));

interactive(children=(FloatSlider(value=7.0, description='lower', max=18.711520554978158, min=-0.3479861642085…