# Fitting large maximum entropy models with simulation - Berger machine translation example

## Here is an example with simulation on a tiny problem

It demonstrates how to use simulation conceptually and the API of `maxentropy`.

As in `example_berger.py`, this is the machine translation example
-- English to French -- from the paper 'A maximum entropy approach
to natural language processing' by Berger et
al., 1996.

Consider the translation of the English word 'in' into French.  We
notice in a corpus of parallel texts the following facts:

    (1)    p(dans) + p(en) + p(à) + p(au cours de) + p(pendant) = 1
    (2)    p(dans) + p(en) = 3/10
    (3)    p(dans) + p(à)  = 1/2

This code finds the probability distribution with maximal entropy
subject to these constraints **without enumerating the sample space**,
using importance sampling instead.

This is way overkill for this tiny problem (which can be solved analytically),
but it demonstrates how to use simulation in principle to solve larger problems
on a continuous or larger discrete sample space.

In [1]:
from __future__ import print_function

import sys

import maxentropy
from maxentropy.maxentutils import dictsampler

In [2]:
import numpy as np

In [3]:
samplespace = ['dans', 'en', 'à', 'au cours de', 'pendant']

In [4]:
@np.vectorize
def f0(x):
    return x in samplespace

@np.vectorize
def f1(x):
    return x == 'dans' or x == 'en'

@np.vectorize
def f2(x):
    return x == 'dans' or x == 'à'

f = [f0, f1, f2]

In [5]:
f0('dans')

array(True, dtype=bool)

In [6]:
# Define a uniform instrumental distribution for sampling
samplefreq = {e: 1 for e in samplespace}

In [7]:
auxiliary_sampler = dictsampler(samplefreq, size=10**5, return_probs='logprob')

In [8]:
next(auxiliary_sampler)

(array(['pendant', 'pendant', 'à', ..., 'en', 'à', 'en'], dtype=object),
 array([-1.60943791, -1.60943791, -1.60943791, ..., -1.60943791,
        -1.60943791, -1.60943791]))

In [9]:
model = maxentropy.BigModel(auxiliary_sampler)

In [10]:
# Default: model.algorithm = 'CG'
# Can choose from ['CG', 'BFGS', 'LBFGSB', 'Powell', 'Nelder-Mead']

In [11]:
# Now set the desired feature expectations
K = [1.0, 0.3, 0.5]

In [12]:
from maxentropy.maxentutils import importance_sampler, create_vectorized_feature_function

ImportError: cannot import name 'importance_sampler'

In [None]:
features = create_vectorized_feature_function(f, sparse=False)

In [None]:
xs, logprobs = next(auxiliary_sampler)

In [None]:
xs

In [None]:
features(xs)

In [None]:
model.samplegen = importance_sampler(features, auxiliary_sampler)
model.reset(len(f))

In [None]:
next(model.samplegen)

In [None]:
model.resample()

In [None]:
model.verbose = True

# Fit the model
# model.avegtol = 1e-5
model.fit(f, K)

In [None]:
# Output the true distribution
print("Fitted model parameters are:")
model.params

In [None]:
smallmodel = maxentropy.Model(samplespace)
smallmodel.setparams(model.params)

In [None]:
smallmodel.params

In [None]:
smallmodel.setfeatures(f)

In [None]:
smallmodel.F.todense()

In [None]:
F = smallmodel.F.todense().T
F

In [None]:
smallmodel.params

In [None]:
F.dot(smallmodel.params)

In [None]:
smallmodel.F.T.dot(smallmodel.params)

In [None]:
print("\nFitted distribution is:")
smallmodel.showdist()

In [None]:
# Now show how well the constraints are satisfied:
print()
print("Desired constraints:")
print("\tp['dans'] + p['en'] = 0.3")
print("\tp['dans'] + p['à']  = 0.5")
print()
print("Actual expectations under the fitted model:")
print("\tp['dans'] + p['en'] =", p[0] + p[1])
print("\tp['dans'] + p['à']  = " + str(p[0]+p[2]))

print("\nEstimated error in constraint satisfaction (should be close to 0):\n"
        + str(abs(model.expectations() - K)))
print("\nTrue error in constraint satisfaction (should be close to 0):\n" +
        str(abs(smallmodel.expectations() - K)))