In [1]:
import numpy as np

Information Theory

Self-Information

$I(X)=−log2(p)$


$I(A) = −log2(1) = 0$

$I(B) = −log2(3/6) = 1$

$I(C) = −log2(1/6) = 2.58$

$I(X) = I(0101) = −log2(1/2^4) = 4$


In [2]:
def self_information(p):
    return -np.log2(p)

print(self_information(1 / 2**4))

4.0


Entropy

$\mathbb E(X) = \sum_{i=1}^{k} x_{i} · p_{i}$




$H(X) = - \sum_{i} p_{i} \log_{2} p_{i}$


In [3]:
# np.nansum return the sum of NaNs. Treats them as zeros.

def entropy(p):
    out = np.nansum(-p * np.log2(p))

    return out


print(entropy(np.array([0.1, 0.5, 0.1, 0.3])))

1.6854752972273346


Joint Entropy

$H(X, Y) = - \sum_{x} \sum_{y} p_{X, Y}(x, y) \log_{2} p_{X, Y}(x, y)$


In [None]:
def joint_entropy(p_xy):
    out = np.nansum(-p_xy * np.log2(p_xy))

    return out


a = [0.1, 0.5, 0.8]
b = [0.1, 0.3, 0.02]
print(joint_entropy(np.array([a, b])))

2.0558948969327187


In [None]:
entropy(np.array(a)) + entropy(np.array(b))

2.0558948969327187

$H(Y \mid X) = - \sum_{x} \sum_{y} p(x, y) \log_{2} p(y \mid x)$

$H(Y \mid X) = H(X, Y) - H(X)$


In [None]:
def conditional_entropy(p_xy, p_x):
    p_y_given_x = p_xy / p_x
    out = np.nansum(-p_xy * np.log2(p_y_given_x))

    return out


print(conditional_entropy(np.array([[0.1, 0.5], [0.2, 0.3]]), np.array([0.2, 0.8])))

0.863547202339972


Mutual information


$I(X, Y) = H(X, Y) - H(Y \mid X) − H(X \mid Y)$


In [None]:
def mutual_information(p_xy, p_x, p_y):
    p = p_xy / (p_x * p_y)
    out = np.nansum(p_xy * np.log2(p))

    return out


print(mutual_information(np.array([[0.1, 0.5], [0.1, 0.3]]),
                        np.array([0.2, 0.8]),
                        np.array([[0.75, 0.25]])))

0.7194602975157967


Kullback–Leibler Divergence - Relative Entropy

$D_{\mathrm{KL}}(P\|Q) = E_{x \sim P} \left[ \log \frac{p(x)}{q(x)} \right]$

In [None]:
def kl_divergence(p, q):
     kl = p * np.log2(p / q)
     out = np.nansum(p * np.log2(p / q))

     return np.abs(out)

p = np.random.normal(100, 1, size=1000)
q = np.random.normal(100, 1, size=1000)

print(kl_divergence(p, q))

131.65252351910203


Cross Entropy

$\mathrm{CE} (P, Q) = H(P) + D_{\mathrm{KL}}(P\|Q)$


$\mathrm{CE}(\mathbf{y}, \hat{\mathbf{y}}) = - \sum_{i=1}^n \sum_{j=1}^k y_{ij} \log_{2}{p_{\theta} (y_{ij}  \mid  \mathbf{x}_i)}$


In [None]:
def cross_entropy(y_hat, y):
    ce = - np.log(y_hat[range(len(y_hat)), y])

    return ce.mean()


labels = np.array([0, 2])
preds = np.array([[0.3, 0.6, 0.1], [0.2, 0.3, 0.5]])

print(cross_entropy(preds, labels))

0.9485599924429406
