In [1]:
import numpy as np
from scipy.stats import bernoulli

In [2]:
def encoding(arr, maxi, mini, iters, d):

    """
    Args:
        arr       : array to encode with shape (d,)
        maxi, mini: max(arr), min(arr)
        iters     : no. of iterations
        d         : len(arr)
    Returns:
        A 2-D array of 1s and 0s generated by 
        Bernoulli Distribution of shape (iters, d).
    """

    # finding the probability of the elements (for which encoder outputs 1).
    prob = (arr - mini) / (maxi-mini) # shape=(d,)

    # generating 1s, 0s based on probabilities.
    # repeating the above step 'iters' no. of times.
    return bernoulli.rvs(prob, size=(iters, d))

def decoding(maxi, mini, encs):
    """
    Args:
        maxi, mini = max(xi), min(xi)
        encs: output of encoding sto_kction shape=(iters, d)
    Returns:
        an array in which 1s, 0s of encs are replaced by maxi, mini.
    """
    
    # replacing 1s with max(xi) and 0s with min(xi)
    return np.where(encs, maxi, mini) # shape = (iters, d)

In [11]:
def sto_k(n, d, ITERS=4096):
    """
    Args:
        n     - number of clients
        d     - dimensions of the array each client has.
        ITERS - no. of iterations.

    Returns: 
        None
        (prints:
            expected error. (as defined in sec-1.2 in DME)
            calculated error. (lemma 2 in DME)
            upper bound of the error. (lemma 3 in DME)
            lower bound of the error. (lemma 4 in DME)
        )
    """
    
    # generated uniform distribution for n clients
    # each with a d-dimensional vector
    x = np.random.rand(n, d)
    
    total = 0
    for xi in x: # for each client:
        # xi.shape = (d,)
        
        maxi = np.max(xi)
        mini = np.min(xi)

        encs = encoding(arr=xi,iters=ITERS,
                         d=d, maxi=maxi, mini=mini)
        yi = decoding(encs=encs.copy(), maxi=maxi, mini=mini)

        # adding the encoded and decoded xi of each xi (in order to take their mean).
        total += yi
    
    # dividing by number of client (inorder to take their mean).
    x_hat_mean = total / n # shape = (iters, d)

    # calculating mean along dimensions of several users
    x_mean = np.mean(x, axis=0) # shape = (d,)
    
    # expected error: (using sec-1.2 in DME)
    # calculating norm for each iteration. output_shape=(iters,)
    # and then calculating mean of the iterations. output_type: scalar.
    exp_err = np.mean(np.linalg.norm((x_hat_mean - x_mean), axis=1)**2)

    # lemma-2 in DME:
    cal_err = np.sum((np.max(x, axis=1, keepdims=True) - x) * \
                     (x - np.min(x, axis=1, keepdims=True))) / (n**2)

    # lemma-3 in DME:
    # calaculating the norm for each client, squaring and adding them.
    up_bnd = ((0.5 * d)/(n**2))*np.sum(np.linalg.norm(x, axis=1)**2)
    
    # lemma-4 in DME:
    # calaculating the norm for each client, squaring and adding them.
    low_bnd = ((0.5 * (d-2))/(n**2))*np.sum(np.linalg.norm(x, axis=1)**2)
    
    print("obs_err:", exp_err)
    print("cal_err:", cal_err)
    print("low_bound:", low_bnd)
    print("up_bound:", up_bnd)


In [8]:
sto_k(n=5, d=10)

obs_err: 0.18561742034979098
cal_err: 0.18355128012152633
low_bound: 2.8356120574655166
up_bound: 3.5445150718318956


In [9]:
sto_k(n=100, d=1000)

obs_err: 1.6586950314192461
cal_err: 1.657271729806991
low_bound: 1656.2780107734307
up_bound: 1659.5972051837985


In [10]:
sto_k(n=10, d=100)

obs_err: 1.583423317250804
cal_err: 1.5828366491296726
low_bound: 163.85418484256934
up_bound: 167.19814779854013
