# Week 10 (4/5-4/11)

## Zoom recordings

* [Tuesday](https://ub.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=179a8fcf-ad1e-4565-a36b-ad0201432e29)
* [Thursday](https://ub.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=2b53c2ce-aa25-48ac-93e2-ad040144b048)

## Notebook


- [View online](../_static/weekly_notebooks/week_10_class.html)
- Download the notebook file: [week_10_class.ipynb](../_static/weekly_notebooks/week_10_class.ipynb).

## KDE and Bayes Classification

## Project 

* [KDE and marathon results](../Projects/kde_marathon_results/kde_marathon_results.ipynb)

## Resources

### 1. Histogram demo

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from ipywidgets import interact, fixed

%config InlineBackend.figure_format = 'retina'
sns.set_theme(style="darkgrid", palette="bright")

def hist(data, bins=10, shift=0):
    '''
    Plots a histogram of data. 
    
    bins:
        number of bins
    shift: 
        shift of bin boundaries. 
    '''
    
    plt.figure(figsize=(12,4))
    max_shift = 5
    data = np.array(data)
    plt.xlim(data.min() - max_shift, data.max() + max_shift)
    sns.histplot(data,
                 stat="density",
                 bins=bins,
                 binrange =  (data.min() - max_shift + shift,
                               data.max() + max_shift + shift)
                )
    sns.rugplot(data, height=0.05, color='w')
    plt.show()


N = 50
rng = np.random.default_rng(10)
data = rng.normal(loc=10, scale=5, size=N)
interact(hist, shift=(0, 5, 0.25), bins=(1, 30, 1), data=fixed(data));

### 2. KDE plot

In [7]:
from scipy.stats import norm

def kde(data, h, x):
    """
    Compute KDE of data with  Gaussian kernels
    
    x:
       KDE value will be computed for each value 
       of this array. 
    h: 
       Kernel bandwidth
    """
    
    y = np.zeros_like(x)
    for X in data:
        y += norm.pdf(x, loc=X, scale=h)
    return y/len(data)


def plot_kde(data, h=0.1, show_kernels=False):
    """
    Plot KDE
    
    data:
        An array with data defining KDE. 
    h:
        Kernel bandwidth. 
    show_kernels:
        Boolean. If true, individual kernels are 
        plotted too. 
    """
    
    x = np.linspace(data.min() - 3, data.max() + 3, 400)
    plt.figure(figsize=(14,4))
    plt.plot(x, kde(data, h, x), lw=7, c='steelblue', alpha=0.4)
    plt.plot(data, [0]*len(data), 'r|', ms=15, mew=2)
    if show_kernels:
        for X in data:
            plt.plot(x, 1/len(data)*norm.pdf(x, loc=X, scale=h), 'r--', lw=0.7)

### 4. KDE integral widget

In [None]:
from  scipy.stats import gaussian_kde

def integrate(data, lower=7, upper=12):
    """
    Plot integral of KDE
    
    data:
        An array with data defining KDE
    lower, upper:
        Limits of integration. 
    """

    kde_data = gaussian_kde(data)
    integral = kde_data.integrate_box(lower, upper)
    xfill = np.linspace(lower, upper, 100)

    x_min = data.min() - 1
    x_max = data.max() + 1
    x = np.linspace(x_min, x_max, 400)

    plt.figure(figsize=(10, 5))
    plt.xlim(x_min - 1, x_max + 1)
    plt.title(f"$\int^{{{upper:.2f}}}_{{{lower:.2f}}}\ f(t)dt = {integral:.3f}$",
              fontsize=20,
              y=1.1)
    plt.plot(x, kde_data(x))
    if lower <= upper:
        plt.fill_between(xfill,
                         kde_data(xfill),
                         0,
                         color='steelblue',
                         alpha=0.3)
    plt.plot(data, [0] * len(data), 'r|', ms=15)


N = 50
rng = np.random.default_rng(10)
data = rng.normal(loc=10, scale=5, size=N)
interact(integrate,
         data=fixed(data),
         lower=(data.min() - 1, data.max() + 1, 0.2),
         upper=(data.min() - 1, data.max() + 1, 0.2));