In [2]:
import pandas as pd 
import numpy as np 
from scipy.special import logsumexp

In [None]:
# ---------------- Parameters ----------------
sigma = 1/11  # Noise standard deviation

# ---------------- Data Input ----------------
data = pd.read_csv('trajectories.txt', header=None, delim_whitespace=True, encoding='utf-16')

# Get the number of time steps (rows) and total columns.
T, total_cols = data.shape
M = total_cols // 2  # Number of trajectories

# Extract positions (true states) and measurements.
positions = data.iloc[:, 0::2].values   # True positions (shape: (T, M))
measurements = data.iloc[:, 1::2].values  # Corresponding measurements (shape: (T, M))

# ---------------- Conditional Entropy H(X | S) ----------------
# Compute log P(x | y) for each trajectory pair (i, j), now including normalized equilibrium probability

def compute_log_p_x_given_y(i):
    """ Compute log probability log P(x_i | y_i) using Monte Carlo approximation """
    y_i = measurements[:, i].reshape(T, 1)  # Measurement trajectory i (shape: (T, 1))
    diff = y_i - positions  # Difference matrix (T, M) comparing y_i to all x_j
    sum_sq_diff = np.sum(diff**2, axis=0)  # Sum over time to get (M,) vector
    log_prob_x_given_y = np.log(eq_weights) - sum_sq_diff / (2 * sigma**2)  # Include P_eq(x0)
    return np.mean(log_prob_x_given_y)  # Average over M samples

log_p_x_given_y = np.array([compute_log_p_x_given_y(i) for i in range(M)])

H_X_given_S = -np.mean(log_p_x_given_y)  # Conditional entropy

# ---------------- Marginal Probability Estimate P(x) ----------------
# Monte Carlo estimate: P(x) ≈ (1/M) sum_j P(x | s'_j)

def compute_log_p_x(i):
    """ Compute log P(x_i) using the Monte Carlo marginalization over s """
    log_prob_x = np.array([compute_log_p_x_given_y(j) for j in range(M)])  # P(x | s'_j)
    return logsumexp(log_prob_x) - np.log(M)

log_p_x = np.array([compute_log_p_x(i) for i in range(M)])

# ---------------- Marginal Entropy H(X) ----------------
H_X = -np.mean(log_p_x)

# ---------------- Mutual Information Calculation ----------------
I_X_S = H_X - H_X_given_S

# ---------------- Results ----------------
print(f"Conditional Entropy H(X|S): {H_X_given_S}")
print(f"Marginal Entropy H(X): {H_X}")
print(f"Mutual Information I(X;S): {I_X_S}")


  data = pd.read_csv('trajectories.txt', header=None, delim_whitespace=True, encoding='utf-16')


MemoryError: Unable to allocate 15.3 MiB for an array with shape (3999, 500) and data type float64