In [1]:
import numpy as np
import math

In [11]:

def compute_average_mutual_information(filename, sigma, bins='auto'):
    """
    Compute the average mutual information I_c from trajectory data.
    
    The data file (filename) is assumed to be a text file containing
    an array of shape (n, 2*M) where:
      - n ~ 4000 is the number of time steps
      - 2*M is the total number of columns with M trajectory pairs:
            * Column 0, 2, 4, ... are the X trajectories.
            * Column 1, 3, 5, ... are the corresponding Y trajectories.
    The formula used is:
    
        I_c = - n * ln(√(2π)*σ)
              - (1/(2σ² * M)) * Σ_{m=1}^{M} Σ_{k=1}^{n} (x_{mk} - y_{mk})²
              + Σ_{k=1}^{n} H(P[y_k])
              
    where H(P[y_k]) is the Shannon entropy (using natural log) of the distribution
    of the Y values at time step k.
    
    Parameters
    ----------
    filename : str
        Name of the file containing the data.
    sigma : float
        The measurement noise standard deviation.
    bins : int or str, optional
        Parameter for np.histogram to estimate the density. Default is 'auto'.
        
    Returns
    -------
    I_c_est : float
        The estimated average mutual information (in nats).
    """
    # Load the data; assumes whitespace-separated values.
    data = np.loadtxt(filename,encoding="utf-16")
    
    # Determine dimensions: n time steps and total columns = 2*M.
    n, total_cols = data.shape
    M = total_cols // 2
    
    # ----- Term 1: Constant Gaussian factor -----
    term1 = - n * math.log(math.sqrt(2 * math.pi) * sigma)
    
    # ----- Term 2: Sum of squared differences between X and Y -----
    sum_sq_diff = 0.0
    for m in range(M):
        # Extract the m-th X trajectory (column index 2*m)
        x_traj = data[:, 2 * m]
        # Extract the m-th Y trajectory (column index 2*m + 1)
        y_traj = data[:, 2 * m + 1]
        # Sum squared differences for this trajectory
        sum_sq_diff += np.sum((x_traj - y_traj) ** 2)
    term2 = - (1.0 / (2 * sigma**2 * M)) * sum_sq_diff
    
    # ----- Term 3: Sum over time steps of the entropy of P[y_k] -----
    # For each time step, we gather the Y values from all M trajectories.
    entropy_sum = 0.0
    for k in range(n):
        # y_k contains the Y values at time step k from all trajectories.
        y_k = data[k, 1::2]  # slice starting at index 1 with step 2
        # Estimate the density using a histogram; density=True returns a PDF.
        counts, bin_edges = np.histogram(y_k, bins=bins, density=True)
        # Get bin widths and approximate the probability mass in each bin.
        bin_widths = np.diff(bin_edges)
        pmf = counts * bin_widths
        # Remove zero probabilities to avoid log(0).
        pmf = pmf[pmf > 0]
        # Compute Shannon entropy (in nats) for this time step.
        H_k = -np.sum(pmf * np.log(pmf))
        entropy_sum += H_k
    term3 = entropy_sum
    
    # ----- Combine terms to obtain the estimated mutual information -----
    I_c_est = term1 + term2 + term3
    return I_c_est

if __name__ == '__main__':
    # File containing the trajectories
    filename = "trajectories.txt"
    
    # Given noise sigma is 1/11
    sigma = 1.0/0.5
    time = 10
    
    # Compute the average mutual information divided by time
    I_c_estimated = compute_average_mutual_information(filename, sigma)/time
    print(I_c_estimated)


  data = np.loadtxt(filename,encoding="utf-16")


ValueError: not enough values to unpack (expected 2, got 1)