# Bayes Core

> Core Bayesian inference functions - updates, sequential processing, and posterior predictive

In [None]:
#| default_exp rbe.bayes_core

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import numpy as np
from typing import Optional, Union, List, Callable
from fastcore.test import test_eq, test_close
from fastcore.all import *
from technical_blog.rbe.probability import normalize, sample

## Core Bayesian Updates

The heart of Bayesian inference - updating beliefs with evidence.

The `update()` function is the core implementation of **Bayes' theorem** - it's how we mathematically update our beliefs when we receive new evidence. Let me break it down:

### What Bayes' Theorem Does

Bayes' theorem tells us how to revise our beliefs (prior) when we observe new evidence:

$$P(H|E) = \frac{P(E|H) \times P(H)}{P(E)}$$

Where:
- **P(H|E)** = posterior (updated belief after seeing evidence)
- **P(E|H)** = likelihood (how probable the evidence is under each hypothesis)
- **P(H)** = prior (our initial belief before seeing evidence)
- **P(E)** = evidence (total probability of observing this evidence)

## How the Function Works

```python
def update(prior, likelihood, evidence=None):
    # Returns: (prior * likelihood) / evidence
```

**Step 1: Input Validation**
- Ensures prior and likelihood have the same shape
- Checks for non-negative values (probabilities can't be negative)
- Auto-normalizes the prior if it doesn't sum to 1

**Step 2: Calculate Evidence**
If not provided, evidence is computed as: `evidence = sum(prior * likelihood)`

This represents the total probability of seeing the evidence across all possible hypotheses.

**Step 3: Apply Bayes' Rule**
Returns `(prior * likelihood) / evidence`



In [None]:
#| export
def update(prior, # Prior probabilities
           likelihood, # Likelihood of evidence given hypothesis
           evidence=None # Optional evidence, defaults to sum(prior * likelihood)
           ):
    """Update prior beliefs with likelihood using Bayes' theorem."""
    prior = np.asarray(prior, dtype=np.float64)
    likelihood = np.asarray(likelihood, dtype=np.float64)
    
    # Validate inputs
    if prior.shape != likelihood.shape: raise ValueError(f"Prior and likelihood shapes don't match: {prior.shape} vs {likelihood.shape}")
    if np.any(prior < 0) or np.any(likelihood < 0): raise ValueError("Prior and likelihood must be non-negative")
    # Normalize prior if needed (common in practice)
    if not np.isclose(np.sum(prior), 1.0): prior = normalize(prior)
    # Compute evidence if not provided
    if evidence is None: evidence = np.sum(prior * likelihood)
    # Check for impossible observation
    if evidence == 0: raise ValueError("Impossible observation: zero evidence")
    # Numerical stability check
    if evidence < 1e-15:
        import warnings
        warnings.warn("Very small evidence value - numerical instability possible")
    
    return (prior * likelihood) / evidence


## Cyber Security Example

Imagine you're detecting network intrusions:

In [None]:
# Prior beliefs about network state
prior = [0.9, 0.08, 0.02]  # [normal, suspicious, attack]

# New evidence: unusual port scanning detected
# Likelihood of seeing port scans under each hypothesis
likelihood = [0.01, 0.7, 0.95]  # Very unlikely if normal, likely if attack

# Update beliefs
posterior = update(prior, likelihood)
# Result: attack probability increases significantly!
posterior


array([0.10714286, 0.66666667, 0.22619048])


## Key Features for Robust Applications

**Automatic Normalization**: Handles unnormalized priors (common when combining multiple sources)

**Error Handling**: 
- Detects impossible observations (zero evidence)
- Warns about numerical instability
- Validates input shapes and non-negativity

**Numerical Stability**: Uses float64 precision to handle the tiny probabilities common in some applications

The beauty is that this single function encapsulates the mathematical foundation of all Bayesian learning - whether you're tracking individual threats or updating complex network models, it all comes down to this core update rule!

In [None]:
# Test Bayesian updates
prior = np.array([0.3, 0.7])
likelihood = np.array([0.8, 0.2])
posterior = update(prior, likelihood)
test_close(np.sum(posterior), 1.0)
assert posterior[0] > prior[0]  # First hypothesis should increase
# Test with unnormalized prior (common in practice)
unnorm_prior = [3, 7]  # Sums to 10, not 1
likelihood = [0.8, 0.2]
posterior = update(unnorm_prior, likelihood)
test_close(np.sum(posterior), 1.0)

# Test numerical stability with tiny values
tiny_prior = [1e-10, 1-1e-10]
tiny_likelihood = [1e-10, 1-1e-10]
posterior = update(tiny_prior, tiny_likelihood)
test_close(np.sum(posterior), 1.0)

# Test shape mismatch error
try:
    update([0.5, 0.5], [0.8, 0.2, 0.1])
    assert False, "Should raise ValueError for shape mismatch"
except ValueError as e:
    assert "shapes don't match" in str(e)

In [None]:
#| export
def sequential(priors, likelihoods, evidences=None):
    "Sequential updating of `priors` with `likelihoods`"
    if evidences is None:
        evidences = [None] * len(likelihoods)
    
    posterior = np.array(priors)
    posteriors = [posterior.copy()]
    
    for likelihood, evidence in zip(likelihoods, evidences):
        posterior = update(posterior, likelihood, evidence)
        posteriors.append(posterior.copy())
    
    return np.array(posteriors)

In [None]:
# Test sequential updating
priors = [0.5, 0.5]
likelihoods = [[0.9, 0.1], [0.8, 0.2], [0.7, 0.3]]
posteriors = sequential(priors, likelihoods)
assert posteriors.shape == (4, 2)  # Initial + 3 updates
test_close(np.sum(posteriors, axis=1), 1.0)  # All normalized

## Posterior Predictive

Sample from the posterior predictive distribution - what future observations might look like given our current beliefs.

In [None]:
#| export
def predictive(posterior, likelihood_fn, n_samples=1000, rng=None):
    "Sample from posterior predictive distribution"
    if rng is None: rng = np.random.default_rng()
    
    # Sample parameter values from posterior
    param_samples = sample(posterior, n_samples, rng)
    
    # Generate predictions for each parameter sample
    predictions = []
    for param_idx in param_samples:
        # likelihood_fn should return a distribution over observations
        obs_dist = likelihood_fn(param_idx)
        obs_sample = sample(obs_dist, 1, rng)
        predictions.append(obs_sample)
    
    return np.array(predictions)

In [None]:
# Test posterior predictive
posterior = [0.6, 0.4]
def simple_likelihood(param_idx):
    if param_idx == 0:
        return [0.8, 0.2]  # Biased toward observation 0
    else:
        return [0.3, 0.7]  # Biased toward observation 1

rng = np.random.default_rng(42)
predictions = predictive(posterior, simple_likelihood, n_samples=100, rng=rng)
assert len(predictions) == 100
assert np.all((predictions >= 0) & (predictions <= 1))

## Bayes Factors

Compare evidence for different hypotheses.

In [None]:
#| export
def bayes_factor(likelihood1, likelihood2, data):
    "Calculate Bayes factor for hypothesis 1 vs 2 given `data`"
    # For single observation
    if np.isscalar(data):
        return likelihood1[data] / likelihood2[data]
    
    # For multiple observations (assuming independence)
    bf = 1.0
    for obs in data:
        bf *= likelihood1[obs] / likelihood2[obs]
    return bf

def interpret_bf(bf):
    "Interpret Bayes factor strength"
    if bf < 1/100:
        return "Decisive evidence against H1"
    elif bf < 1/10:
        return "Strong evidence against H1"
    elif bf < 1/3:
        return "Moderate evidence against H1"
    elif bf < 1:
        return "Weak evidence against H1"
    elif bf < 3:
        return "Weak evidence for H1"
    elif bf < 10:
        return "Moderate evidence for H1"
    elif bf < 100:
        return "Strong evidence for H1"
    else:
        return "Decisive evidence for H1"

In [None]:
# Test Bayes factors
like1 = [0.9, 0.1]  # H1: mostly generates observation 0
like2 = [0.2, 0.8]  # H2: mostly generates observation 1

bf_single = bayes_factor(like1, like2, 0)
test_close(bf_single, 4.5)  # 0.9/0.2

bf_multiple = bayes_factor(like1, like2, [0, 0, 1])
test_close(bf_multiple, 4.5 * 4.5 * 0.125)  # (0.9/0.2)^2 * (0.1/0.8)

# Test interpretation
assert "Strong evidence for" in interpret_bf(50)
assert "Weak evidence against" in interpret_bf(0.5)

## Conjugate Priors

Helper functions for common conjugate prior-likelihood pairs.

In [None]:
#| export
def beta_binomial_update(alpha, beta, successes, failures):
    "Update Beta prior with binomial data"
    return alpha + successes, beta + failures

def normal_normal_update(prior_mean, prior_var, data_mean, data_var, n_obs):
    "Update Normal prior with Normal likelihood"
    # Precision weighting
    prior_prec = 1 / prior_var
    data_prec = n_obs / data_var
    
    post_prec = prior_prec + data_prec
    post_mean = (prior_prec * prior_mean + data_prec * data_mean) / post_prec
    post_var = 1 / post_prec
    
    return post_mean, post_var

In [None]:
# Test conjugate updates
# Beta-Binomial
alpha_post, beta_post = beta_binomial_update(1, 1, 7, 3)
test_eq(alpha_post, 8)
test_eq(beta_post, 4)

# Normal-Normal  
post_mean, post_var = normal_normal_update(0, 1, 2, 0.5, 10)
# Should be weighted toward data due to more observations
assert 1.5 < post_mean < 2.0
assert post_var < 0.5  # Should be more certain than either alone

## Export

In [None]:
#| export
__all__ = [
    # Core updates
    'update', 'sequential',
    
    # Posterior predictive
    'predictive',
    
    # Model comparison
    'bayes_factor', 'interpret_bf',
    
    # Conjugate priors
    'beta_binomial_update', 'normal_normal_update'
]