# Bayes Core

> Core Bayesian inference functions - updates, sequential processing, and posterior predictive

In [None]:
#| default_exp rbe.bayes_core

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import numpy as np
from typing import Optional, Union, List, Callable
from fastcore.test import test_eq, test_close
from fastcore.all import *
from technical_blog.rbe.probability import normalize, sample

## Core Bayesian Updates

The heart of Bayesian inference - updating beliefs with evidence.

The `update()` function is the core implementation of **Bayes' theorem** - it's how we mathematically update our beliefs when we receive new evidence. Let me break it down:

### What Bayes' Theorem Does

Bayes' theorem tells us how to revise our beliefs (prior) when we observe new evidence:

$$P(H|E) = \frac{P(E|H) \times P(H)}{P(E)}$$

Where:
- **P(H|E)** = posterior (updated belief after seeing evidence)
- **P(E|H)** = likelihood (how probable the evidence is under each hypothesis)
- **P(H)** = prior (our initial belief before seeing evidence)
- **P(E)** = evidence (total probability of observing this evidence)

## How the Function Works

```python
def update(prior, likelihood, evidence=None):
    # Returns: (prior * likelihood) / evidence
```

**Step 1: Input Validation**
- Ensures prior and likelihood have the same shape
- Checks for non-negative values (probabilities can't be negative)
- Auto-normalizes the prior if it doesn't sum to 1

**Step 2: Calculate Evidence**
If not provided, evidence is computed as: `evidence = sum(prior * likelihood)`

This represents the total probability of seeing the evidence across all possible hypotheses.

**Step 3: Apply Bayes' Rule**
Returns `(prior * likelihood) / evidence`



In [None]:
#| export
def update(prior, # Prior probabilities
           likelihood, # Likelihood of evidence given hypothesis
           evidence=None # Optional evidence, defaults to sum(prior * likelihood)
           ):
    """Update prior beliefs with likelihood using Bayes' theorem."""
    prior = np.asarray(prior, dtype=np.float64)
    likelihood = np.asarray(likelihood, dtype=np.float64)
    
    # Validate inputs
    if prior.shape != likelihood.shape: raise ValueError(f"Prior and likelihood shapes don't match: {prior.shape} vs {likelihood.shape}")
    if np.any(prior < 0) or np.any(likelihood < 0): raise ValueError("Prior and likelihood must be non-negative")
    # Normalize prior if needed (common in practice)
    if not np.isclose(np.sum(prior), 1.0): prior = normalize(prior)
    # Compute evidence if not provided
    if evidence is None: evidence = np.sum(prior * likelihood)
    # Check for impossible observation
    if evidence == 0: raise ValueError("Impossible observation: zero evidence")
    # Numerical stability check
    if evidence < 1e-15:
        import warnings
        warnings.warn("Very small evidence value - numerical instability possible")
    
    return (prior * likelihood) / evidence


## Cyber Security Example

Imagine you're detecting network intrusions:

In [None]:
# Prior beliefs about network state
prior = [0.9, 0.08, 0.02]  # [normal, suspicious, attack]

# New evidence: unusual port scanning detected
# Likelihood of seeing port scans under each hypothesis
likelihood = [0.01, 0.7, 0.95]  # Very unlikely if normal, likely if attack

# Update beliefs
posterior = update(prior, likelihood)
# Result: attack probability increases significantly!
posterior


array([0.10714286, 0.66666667, 0.22619048])


## Key Features for Robust Applications

**Automatic Normalization**: Handles unnormalized priors (common when combining multiple sources)

**Error Handling**: 
- Detects impossible observations (zero evidence)
- Warns about numerical instability
- Validates input shapes and non-negativity

**Numerical Stability**: Uses float64 precision to handle the tiny probabilities common in some applications

The beauty is that this single function encapsulates the mathematical foundation of all Bayesian learning - whether you're tracking individual threats or updating complex network models, it all comes down to this core update rule!

In [None]:
# Test Bayesian updates
prior = np.array([0.3, 0.7])
likelihood = np.array([0.8, 0.2])
posterior = update(prior, likelihood)
test_close(np.sum(posterior), 1.0)
assert posterior[0] > prior[0]  # First hypothesis should increase
# Test with unnormalized prior (common in practice)
unnorm_prior = [3, 7]  # Sums to 10, not 1
likelihood = [0.8, 0.2]
posterior = update(unnorm_prior, likelihood)
test_close(np.sum(posterior), 1.0)

# Test numerical stability with tiny values
tiny_prior = [1e-10, 1-1e-10]
tiny_likelihood = [1e-10, 1-1e-10]
posterior = update(tiny_prior, tiny_likelihood)
test_close(np.sum(posterior), 1.0)

# Test shape mismatch error
try:
    update([0.5, 0.5], [0.8, 0.2, 0.1])
    assert False, "Should raise ValueError for shape mismatch"
except ValueError as e:
    assert "shapes don't match" in str(e)

### Sequential
The `sequential` function implements **sequential Bayesian updating** - it's how you process multiple observations one after another, updating your beliefs with each new piece of evidence.


In [None]:
#| export
def sequential(priors,  # prior probabilities of hypotheses 
               likelihoods, # likelihoods of observations given hypotheses
               evidences=None # evidence for each observation
               ):
    """Sequential Bayesian updates with multiple observations."""
    priors = np.asarray(priors, dtype=np.float64)
    likelihoods = np.asarray(likelihoods, dtype=np.float64)
    
    # Validate inputs
    if len(likelihoods) == 0:
        return np.array([priors])
    
    if likelihoods.ndim != 2:
        raise ValueError("Likelihoods must be 2D array (n_observations, n_hypotheses)")
    
    if likelihoods.shape[1] != len(priors):
        raise ValueError(f"Likelihood shape {likelihoods.shape} incompatible with prior length {len(priors)}")
    
    if evidences is None:
        evidences = [None] * len(likelihoods)
    elif len(evidences) != len(likelihoods):
        raise ValueError("Number of evidences must match number of likelihoods")
    
    # Perform sequential updates
    posterior = priors.copy()
    posteriors = [posterior.copy()]
    
    for likelihood, evidence in zip(likelihoods, evidences):
        posterior = update(posterior, likelihood, evidence)
        posteriors.append(posterior.copy())
    
    return np.array(posteriors)


#### What It Does

Instead of updating beliefs with just one observation (like the basic `update` function), `sequential` handles a whole series of observations:

In [None]:
# Start with initial beliefs
prior = [0.95, 0.04, 0.01]  # [normal, suspicious, attack]

# Process multiple observations over time
observations = [
    [0.1, 0.6, 0.9],   # High anomaly score
    [0.05, 0.8, 0.95], # Even higher anomaly  
    [0.2, 0.3, 0.7],   # Moderate anomaly
    [0.9, 0.1, 0.05]   # Back to normal
]

timeline = sequential(prior, observations)
# Returns: array of beliefs after each observation
timeline


array([[0.95      , 0.04      , 0.01      ],
       [0.7421875 , 0.1875    , 0.0703125 ],
       [0.14615385, 0.59076923, 0.26307692],
       [0.07483261, 0.45372194, 0.47144545],
       [0.49414824, 0.33289987, 0.17295189]])


#### How It Works

The function performs these steps:

1. **Starts with your prior beliefs**
2. **For each observation**:
   - Takes current beliefs as the "prior" for this update
   - Applies Bayes' theorem using the observation's likelihood
   - The resulting posterior becomes the prior for the next observation
3. **Returns the complete timeline** of how beliefs evolved

Mathematically, it's chaining Bayes' updates:
- After obs 1: `P(H|obs1) = P(obs1|H) × P(H) / P(obs1)`
- After obs 2: `P(H|obs1,obs2) = P(obs2|H) × P(H|obs1) / P(obs2)`
- And so on...

#### Key Features for Cyber Security

**Timeline Tracking**: You get the complete evolution of beliefs, not just the final result. This lets you see:
- When threat probability peaked
- How quickly beliefs changed
- Whether the system is converging or oscillating

**Robust Error Handling**: 
- Validates that likelihoods are properly shaped (2D array)
- Ensures evidence counts match observations
- Handles edge cases like empty observation sequences

**Memory Efficiency**: Processes observations one at a time rather than requiring all data in memory simultaneously.

#### Cyber Security Example

In a network anomaly detection scenario:

```python
# Each row represents likelihood of observation under each network state
# [normal_likelihood, suspicious_likelihood, attack_likelihood]
network_observations = [
    [0.1, 0.6, 0.9],   # Suspicious traffic pattern
    [0.05, 0.8, 0.95], # Even more suspicious
    [0.9, 0.1, 0.05]   # Returns to normal
]

belief_timeline = sequential(network_prior, network_observations)
```

This gives you a complete picture of how your RBE's confidence in different threat levels evolved as new network data arrived - essential for understanding both the current threat state and the system's decision-making process.

The function essentially turns your single-shot Bayesian update into a learning system that accumulates evidence over time!



##### Edge Cases & Error Conditions

In [None]:
# Test sequential updating
priors = [0.5, 0.5]
likelihoods = [[0.9, 0.1], [0.8, 0.2], [0.7, 0.3]]
posteriors = sequential(priors, likelihoods)
assert posteriors.shape == (4, 2)  # Initial + 3 updates
test_close(np.sum(posteriors, axis=1), 1.0)  # All normalized

In [None]:
# Empty observations (should return just the prior)
empty_result = sequential([0.6, 0.4], [])
test_eq(empty_result.shape, (1, 2))
test_close(empty_result[0], [0.6, 0.4])

# Single observation (common case)
single_result = sequential([0.5, 0.5], [[0.8, 0.2]])
test_eq(single_result.shape, (2, 2))

# Test with custom evidences
priors = [0.4, 0.6]
likelihoods = [[0.9, 0.1], [0.7, 0.3]]
evidences = [0.5, 0.8]  # Custom evidence values
result = sequential(priors, likelihoods, evidences)
# Should use provided evidences instead of computing them


##### Cyber Security Specific Tests

In [None]:
# Realistic network anomaly scenario
network_prior = [0.95, 0.04, 0.01]  # [normal, suspicious, attack]

# Sequence of observations over time
observations = [
    [0.1, 0.6, 0.9],   # High anomaly score
    [0.05, 0.8, 0.95], # Even higher anomaly
    [0.2, 0.3, 0.7],   # Moderate anomaly
    [0.9, 0.1, 0.05]   # Back to normal
]

timeline = sequential(network_prior, observations)

# Verify attack probability peaks and then decreases
attack_probs = timeline[:, 2]  # Extract attack column
peak_idx = np.argmax(attack_probs[1:]) + 1  # Skip initial prior
assert attack_probs[peak_idx] > attack_probs[0], "Attack probability should increase"
assert attack_probs[-1] < attack_probs[peak_idx], "Should decrease after normal observation"

##### Numerical Stability Tests

In [None]:
# Very small likelihoods (rare events)
tiny_likelihoods = [[1e-15, 1-1e-15], [1e-14, 1-1e-14]]
result = sequential([0.5, 0.5], tiny_likelihoods)
assert np.all(np.isfinite(result)), "Should handle tiny values"

# Extreme confidence updates
extreme_likes = [[0.999, 0.001], [0.001, 0.999]]
result = sequential([0.5, 0.5], extreme_likes)
# Should handle rapid belief changes without numerical issues


##### Input Validation Tests

In [None]:
# Wrong likelihood dimensions
try:
    sequential([0.5, 0.5], [0.8, 0.2])  # 1D instead of 2D
    assert False, "Should reject 1D likelihoods"
except ValueError as e:
    assert "2D array" in str(e)

# Mismatched evidence count
try:
    sequential([0.5, 0.5], [[0.8, 0.2]], evidences=[0.5, 0.6])  # 2 evidences, 1 likelihood
    assert False, "Should reject mismatched evidence count"
except ValueError as e:
    assert "match number of likelihoods" in str(e)

# Incompatible shapes
try:
    sequential([0.5, 0.5], [[0.8, 0.2, 0.1]])  # 3 hypotheses vs 2 in prior
    assert False, "Should reject shape mismatch"
except ValueError as e:
    assert "incompatible" in str(e)

##### Convergence and Learning Tests

In [None]:
# Test convergence with consistent evidence
consistent_evidence = [[0.9, 0.1]] * 10  # Same observation repeated
result = sequential([0.5, 0.5], consistent_evidence)

# Should converge toward first hypothesis
final_belief = result[-1, 0]
assert final_belief > 0.95, "Should strongly favor consistent hypothesis"

# Test belief oscillation with conflicting evidence
conflicting = [[0.9, 0.1], [0.1, 0.9]] * 5  # Alternating evidence
result = sequential([0.5, 0.5], conflicting)
# Final belief shouldn't be too extreme in either direction
assert 0.2 < result[-1, 0] < 0.8, "Conflicting evidence should maintain uncertainty"


## Posterior Predictive

Sample from the posterior predictive distribution - what future observations might look like given our current beliefs.

The `predictive` function implements **posterior predictive sampling** - a key technique in Bayesian inference that answers the question: "Given what I've learned so far, what kinds of observations might I see in the future?"

#### What It Does

The function generates synthetic future observations by combining:
1. **Your current beliefs** (posterior distribution over parameters/hypotheses)
2. **The observation model** (likelihood function that maps parameters to observation probabilities)


In [None]:
#| export
def predictive(posterior, likelihood_fn, n_samples=1000, rng=None):
    """Vectorized posterior predictive sampling."""
    if rng is None: rng = np.random.default_rng()
    
    posterior = normalize(posterior)
    param_samples = sample(posterior, n_samples, rng)
    
    # Group samples by parameter for efficient batch processing
    unique_params, counts = np.unique(param_samples, return_counts=True)
    
    predictions = []
    for param_idx, count in zip(unique_params, counts):
        obs_dist = normalize(likelihood_fn(param_idx))
        obs_samples = sample(obs_dist, count, rng)
        predictions.extend(obs_samples)
    
    # Shuffle to remove parameter ordering bias
    rng.shuffle(predictions)
    return np.array(predictions, dtype=int)



#### How It Works

The algorithm follows a two-step process that mirrors the generative story of Bayesian models:

**Step 1: Sample Parameters**


```python
param_samples = sample(posterior, n_samples, rng)
```


This draws parameter values according to your current beliefs. If you're 70% confident in "normal network state" and 30% confident in "attack state", about 70% of the samples will be "normal".

**Step 2: Generate Observations**
For each sampled parameter, use the likelihood function to determine what observations that parameter would generate:
```python
obs_dist = normalize(likelihood_fn(param_idx))
obs_samples = sample(obs_dist, count, rng)
```

## Clever Optimization

The function uses **vectorized batch processing** for efficiency:

```python
unique_params, counts = np.unique(param_samples, return_counts=True)
```

Instead of processing 1000 individual samples, it groups them: "I need 700 observations from parameter 0 and 300 from parameter 1." This is much faster than generating observations one by one.

## Cyber Security Applications

**1. Threat Forecasting**
```python
# Current beliefs about network state
network_posterior = [0.6, 0.3, 0.1]  # [normal, suspicious, attack]

def network_observations(state_idx):
    if state_idx == 0:  # Normal state
        return [0.9, 0.08, 0.02]  # [normal_traffic, anomaly, alert]
    elif state_idx == 1:  # Suspicious state  
        return [0.4, 0.5, 0.1]
    else:  # Attack state
        return [0.1, 0.3, 0.6]

# What kinds of network events should we expect?
future_events = predictive(network_posterior, network_observations, n_samples=1000)
```

**2. Anomaly Detection Validation**
Generate synthetic data that matches your current model, then compare with actual observations to detect model drift or new attack patterns.

**3. Alert System Tuning**
Predict how many alerts different threshold settings would generate under your current threat model.

**4. Resource Planning**
Estimate future computational or analyst workload based on predicted event distributions.

## Key Features

**Numerical Stability**: Normalizes both posterior and likelihood distributions to ensure valid probabilities.

**Reproducibility**: Uses controlled random number generation for consistent results across runs.

**Bias Removal**: Shuffles final predictions to remove any ordering artifacts from the batch processing.

**Type Safety**: Returns integer indices (not floats) since observations are typically categorical.

## Example Output Interpretation

If you get predictions like `[0, 0, 1, 0, 0, 2, 0, ...]`, this means:
- Most future observations will be type 0 (normal)
- Occasional type 1 observations (suspicious)  
- Rare type 2 observations (attacks)

The relative frequencies tell you what to expect: if 80% are type 0, your model predicts the network will be normal 80% of the time.

This is invaluable for **proactive security planning** - instead of just reacting to threats, you can anticipate what's likely to happen and prepare accordingly!



In [None]:
# Test posterior predictive
posterior = [0.6, 0.4]
def simple_likelihood(param_idx):
    if param_idx == 0:
        return [0.8, 0.2]  # Biased toward observation 0
    else:
        return [0.3, 0.7]  # Biased toward observation 1

rng = np.random.default_rng(42)
predictions = predictive(posterior, simple_likelihood, n_samples=100, rng=rng)
assert len(predictions) == 100
assert np.all((predictions >= 0) & (predictions <= 1))

In [None]:
posterior = [0.7, 0.3]

def likelihood_fn(param_idx):
    if param_idx == 0:
        return [0.9, 0.1]  # Parameter 0 strongly predicts observation 0
    else:
        return [0.2, 0.8]  # Parameter 1 strongly predicts observation 1

rng = np.random.default_rng(42)
predictions = predictive(posterior, likelihood_fn, n_samples=1000, rng=rng)

# Check output format
assert predictions.shape == (1000,), "Should return 1D array"
assert predictions.dtype == int, "Should return integer indices"
assert np.all((predictions >= 0) & (predictions <= 1)), "All predictions should be valid indices"

# Check statistical properties
# Since posterior favors param 0 (0.7 vs 0.3), and param 0 favors obs 0 (0.9 vs 0.1),
# we should see more 0s than 1s in predictions
obs_0_count = np.sum(predictions == 0)
obs_1_count = np.sum(predictions == 1)
assert obs_0_count > obs_1_count, "Should predict observation 0 more often"

# Rough check: expect about 70% * 90% + 30% * 20% = 69% observation 0
expected_ratio = 0.7 * 0.9 + 0.3 * 0.2  # ≈ 0.69
actual_ratio = obs_0_count / 1000
assert abs(actual_ratio - expected_ratio) < 0.05, f"Expected ~{expected_ratio:.2f}, got {actual_ratio:.2f}"



## Bayes Factors

Compare evidence for different hypotheses.

In [None]:
#| export
def bayes_factor(likelihood1, likelihood2, data):
    "Calculate Bayes factor for hypothesis 1 vs 2 given `data`"
    # For single observation
    if np.isscalar(data):
        return likelihood1[data] / likelihood2[data]
    
    # For multiple observations (assuming independence)
    bf = 1.0
    for obs in data:
        bf *= likelihood1[obs] / likelihood2[obs]
    return bf

def interpret_bf(bf):
    "Interpret Bayes factor strength"
    if bf < 1/100:
        return "Decisive evidence against H1"
    elif bf < 1/10:
        return "Strong evidence against H1"
    elif bf < 1/3:
        return "Moderate evidence against H1"
    elif bf < 1:
        return "Weak evidence against H1"
    elif bf < 3:
        return "Weak evidence for H1"
    elif bf < 10:
        return "Moderate evidence for H1"
    elif bf < 100:
        return "Strong evidence for H1"
    else:
        return "Decisive evidence for H1"

In [None]:
# Test Bayes factors
like1 = [0.9, 0.1]  # H1: mostly generates observation 0
like2 = [0.2, 0.8]  # H2: mostly generates observation 1

bf_single = bayes_factor(like1, like2, 0)
test_close(bf_single, 4.5)  # 0.9/0.2

bf_multiple = bayes_factor(like1, like2, [0, 0, 1])
test_close(bf_multiple, 4.5 * 4.5 * 0.125)  # (0.9/0.2)^2 * (0.1/0.8)

# Test interpretation
assert "Strong evidence for" in interpret_bf(50)
assert "Weak evidence against" in interpret_bf(0.5)

## Conjugate Priors

Helper functions for common conjugate prior-likelihood pairs.

In [None]:
#| export
def beta_binomial_update(alpha, beta, successes, failures):
    "Update Beta prior with binomial data"
    return alpha + successes, beta + failures

def normal_normal_update(prior_mean, prior_var, data_mean, data_var, n_obs):
    "Update Normal prior with Normal likelihood"
    # Precision weighting
    prior_prec = 1 / prior_var
    data_prec = n_obs / data_var
    
    post_prec = prior_prec + data_prec
    post_mean = (prior_prec * prior_mean + data_prec * data_mean) / post_prec
    post_var = 1 / post_prec
    
    return post_mean, post_var

In [None]:
# Test conjugate updates
# Beta-Binomial
alpha_post, beta_post = beta_binomial_update(1, 1, 7, 3)
test_eq(alpha_post, 8)
test_eq(beta_post, 4)

# Normal-Normal  
post_mean, post_var = normal_normal_update(0, 1, 2, 0.5, 10)
# Should be weighted toward data due to more observations
assert 1.5 < post_mean < 2.0
assert post_var < 0.5  # Should be more certain than either alone

## Export

In [None]:
#| export
__all__ = [
    # Core updates
    'update', 'sequential',
    
    # Posterior predictive
    'predictive',
    
    # Model comparison
    'bayes_factor', 'interpret_bf',
    
    # Conjugate priors
    'beta_binomial_update', 'normal_normal_update'
]