# Description

In this notebook, I will explore the uncertainty using entropy. Intuition:
- Low entropy (low uncertainty) => High confidence
- High entropy => Low confidence

In [2]:
import numpy as np

In [7]:
def calculate_entropy(prob_distribution, epsilon=1e-9):
    """
    Calculate the entropy of a probability distribution.
    
    Parameters:
    - prob_distribution (list or numpy array): A list or array containing probabilities for each possible word in the vocabulary.
    
    Returns:
    - entropy (float): The entropy of the distribution, indicating the uncertainty/confidence of the prediction.
    """
    if type(prob_distribution) != np.ndarray:
        prob_distribution = np.array(prob_distribution)
    entropy = -np.sum(prob_distribution * np.log(prob_distribution + epsilon))
    return entropy

In [10]:
ENTROPY_THRESHOLD = 0.5

prob_distribution = [0.9, 0.03, 0.03, 0.01, 0.01, 0.005, 0.015]  # Example probability distribution for a predicted word
entropy = calculate_entropy(prob_distribution)
print(f"Entropy: {entropy:.4f}")

if entropy < ENTROPY_THRESHOLD:
    print("Prediction is confident")
else:
    print("Prediction is uncertain")

Entropy: 0.4868
Prediction is confident
