In [11]:
# Derived from the DCGAN paper's Parzen Estimation LL calculations:
#
# Changes include Updating code from Python2 to Python3.6+, adding more verbose comments on LL 
# methods, and cutting CLI wrapper
#
# See: https://github.com/goodfeli/adversarial/blob/master/parzen_ll.py

import theano
import theano.tensor as T
import numpy as np
import gc

In [10]:
def get_nll(x, parzen, batch_size=10):
    """Credit: Yann N. Dauphin"""

    inds = range(x.shape[0])
    n_batches = int(np.ceil(float(len(inds)) / batch_size))
    nlls = []
    for i in range(n_batches):
        nll = parzen(x[inds[i::n_batches]])
        nlls.extend(nll)

        if i % 10 == 0:
            print(
                f" [{datetime.datetime.utcnow().__str__()}]\t [{i}/{n_batches}]\t Mean NLL: {np.mean(nlls)}"
            )
            
    return np.array(nlls)


def log_mean_exp(a):
    """Credit: Yann N. Dauphin"""
    max_ = a.max(1)
    return max_ + T.log(T.exp(a - max_.dimshuffle(0, "x")).mean(1))


def theano_parzen(mu, sigma):
    """Credit: Yann N. Dauphin"""

    x = T.matrix()
    mu = theano.shared(mu)
    a = (x.dimshuffle(0, "x", 1) - mu.dimshuffle("x", 0, 1)) / sigma
    E = log_mean_exp(-0.5 * (a ** 2).sum(2))
    Z = mu.shape[1] * T.log(sigma * np.sqrt(np.pi * 2))

    return theano.function([x], E - Z)


def cross_validate_sigma(samples, data, sigmas, batch_size):

    lls = []
    for sigma in sigmas:
        print(f"Cross Validating σ = {sigma}")
        
        parzen = theano_parzen(samples, sigma)
        tmp = get_nll(data, parzen, batch_size=batch_size)
        
        lls.append(numpy.asarray(tmp).mean())
        del parzen
        gc.collect()

    ind = numpy.argmax(lls)
    return sigmas[ind]

In [None]:
# If we are comfortable estimating sigma for the Gaussian (or have estimated it before), then
# skip sigma estimation

# Inputs...
BATCH_SIZE = 128
DATASET_SIGMA = None

# Get Data From G(Z)
generated_data = None

# Get Data From MSLS
msls_real_data = None

if DATASET_SIGMA:
    sigma = float(args.sigma)
else:
    # To validate sigma; pull real data from DataLoader
    validation_data = None

    sigma = cross_validate_sigma(
        generated_data,
        validation_data,
        numpy.logspace(-1.0, 0, num=10),  # Default Sigma Space...
        BATCH_SIZE,  # Default Batch Size
    )

print(f"Using Sigma: {sigma}")
gc.collect()

# fit and evaulate
parzen = theano_parzen(generated_data, sigma)

ll = get_nll(test.X, parzen, batch_size=BATCH_SIZE)

se = ll.std() / numpy.sqrt(test.X.shape[0])

print("Log-Likelihood of test set = {ll.mean()}, se: {se}")