In [1]:
#!/usr/bin/env python3

This file illustrates how you might experiment with the HMM interface.
You can paste these commands in at the Python prompt, or execute `test_ic.py` directly.
A notebook interface is nicer than the plain Python prompt, so we provide
a notebook version of this file as `test_ic.ipynb`, which you can open with
`jupyter` or with Visual Studio `code` (run it with the `nlp-class` kernel).

In [2]:
import logging, math, os
from pathlib import Path
from typing import Callable

In [3]:
from corpus import TaggedCorpus
from eval import model_cross_entropy, write_tagging
from hmm import HiddenMarkovModel
from lexicon import build_lexicon
import torch

  from .autonotebook import tqdm as notebook_tqdm


Set up logging.

In [4]:
log = logging.getLogger("test_ic")       # For usage, see findsim.py in earlier assignment.
logging.basicConfig(level=logging.INFO)  # could change INFO to DEBUG
# torch.autograd.set_detect_anomaly(True)    # uncomment to improve error messages from .backward(), but slows down

Switch working directory to the directory where the data live.  You may want to edit this line.

In [5]:
os.chdir("../data")

Make an HMM with randomly initialized parameters.

In [6]:
icsup = TaggedCorpus(Path("icsup"), add_oov=False)
log.info(f"Ice cream vocabulary: {list(icsup.vocab)}")
log.info(f"Ice cream tagset: {list(icsup.tagset)}")
lexicon = build_lexicon(icsup, one_hot=True)   # one-hot lexicon: separate parameters for each word
hmm = HiddenMarkovModel(icsup.tagset, icsup.vocab, lexicon)

INFO:corpus:Read 40 tokens from icsup
INFO:corpus:Created 4 tag types
INFO:corpus:Created 5 word types
INFO:test_ic:Ice cream vocabulary: ['1', '2', '3', '_EOS_WORD_', '_BOS_WORD_']
INFO:test_ic:Ice cream tagset: ['C', 'H', '_EOS_TAG_', '_BOS_TAG_']


In [7]:
log.info("*** Current A, B matrices (computed by softmax from small random parameters)")
hmm.updateAB()   # compute the matrices from the initial parameters (this would normally happen during training).
                 # An alternative is to set them directly to some spreadsheet values you'd like to try.
hmm.printAB()

INFO:test_ic:*** Current A, B matrices (computed by softmax from small random parameters)


Transition matrix A:
	C	H	_EOS_TAG_	_BOS_TAG_
C	0.334	0.332	0.334	0.000
H	0.332	0.334	0.334	0.000
_EOS_TAG_	0.333	0.335	0.332	0.000
_BOS_TAG_	0.333	0.333	0.334	0.000

Emission matrix B:
	1	2	3
C	0.332	0.334	0.334
H	0.332	0.334	0.334
_EOS_TAG_	0.000	0.000	0.000
_BOS_TAG_	0.000	0.000	0.000




While training on ice cream, we will just evaluate the cross-entropy
on the training data itself (icsup), since we are interested in watching it improve.

In [8]:
log.info("*** Supervised training on icsup")
cross_entropy_loss = lambda model: model_cross_entropy(model, icsup)
hmm.train(corpus=icsup, loss=cross_entropy_loss, 
          minibatch_size=10, evalbatch_size=500, lr=0.01, tolerance=0.0001)

INFO:test_ic:*** Supervised training on icsup
INFO:hmm:Training HiddenMarkovModel with 36 parameters
4it [00:00, 611.99it/s]
INFO:eval:Cross-entropy: 1.9662 nats (= perplexity 7.143)
28it [00:00, 277.34it/s]

1.9661612944169478


4it [00:00, 667.35it/s]s]
INFO:eval:Cross-entropy: 1.1094 nats (= perplexity 3.032)
532it [00:02, 285.96it/s]

1.1093741330233486


4it [00:00, 658.96it/s]s]
INFO:eval:Cross-entropy: 1.0949 nats (= perplexity 2.989)
1033it [00:03, 296.72it/s]

1.0948585163463245


4it [00:00, 760.18it/s]/s]
INFO:eval:Cross-entropy: 1.0893 nats (= perplexity 2.972)
1533it [00:05, 221.80it/s]

1.0893084352666682


4it [00:00, 444.59it/s]/s]
INFO:eval:Cross-entropy: 1.0864 nats (= perplexity 2.964)
2019it [00:08, 192.02it/s]

1.086406491019509


4it [00:00, 411.49it/s]/s]
INFO:eval:Cross-entropy: 1.0846 nats (= perplexity 2.958)
2521it [00:10, 194.33it/s]

1.0846293622797185


4it [00:00, 591.81it/s]/s]
INFO:eval:Cross-entropy: 1.0834 nats (= perplexity 2.955)
3029it [00:12, 222.55it/s]

1.0834312438964844


4it [00:00, 815.50it/s]/s]
INFO:eval:Cross-entropy: 1.0826 nats (= perplexity 2.952)
3529it [00:14, 254.85it/s]

1.0825699025934392


4it [00:00, 639.25it/s]/s]
INFO:eval:Cross-entropy: 1.0819 nats (= perplexity 2.950)
4045it [00:16, 239.71it/s]

1.08192101391879


4it [00:00, 653.47it/s]/s]
INFO:eval:Cross-entropy: 1.0814 nats (= perplexity 2.949)
4550it [00:18, 272.24it/s]

1.0814150029962712


4it [00:00, 664.68it/s]/s]
INFO:eval:Cross-entropy: 1.0810 nats (= perplexity 2.948)
5029it [00:20, 270.88it/s]

1.0810093879699707


4it [00:00, 687.42it/s]/s]
INFO:eval:Cross-entropy: 1.0807 nats (= perplexity 2.947)
5525it [00:22, 232.76it/s]

1.080676945773038


4it [00:00, 424.55it/s]/s]
INFO:eval:Cross-entropy: 1.0804 nats (= perplexity 2.946)
6012it [00:24, 209.55it/s]

1.0803996432911267


4it [00:00, 394.20it/s]/s]
INFO:eval:Cross-entropy: 1.0802 nats (= perplexity 2.945)
6531it [00:26, 201.42it/s]

1.0801649527116255


4it [00:00, 578.23it/s]/s]
INFO:eval:Cross-entropy: 1.0800 nats (= perplexity 2.945)
7031it [00:29, 228.59it/s]

1.0799635973843662


4it [00:00, 582.14it/s]/s]
INFO:eval:Cross-entropy: 1.0798 nats (= perplexity 2.944)
7536it [00:31, 242.74it/s]

1.079788944937966


4it [00:00, 496.03it/s]/s]
INFO:eval:Cross-entropy: 1.0796 nats (= perplexity 2.944)
8019it [00:33, 201.02it/s]

1.079636270349676


4it [00:00, 626.88it/s]/s]
INFO:eval:Cross-entropy: 1.0795 nats (= perplexity 2.943)
8541it [00:36, 227.35it/s]

1.0795014121315696


4it [00:00, 511.05it/s]/s]
INFO:eval:Cross-entropy: 1.0794 nats (= perplexity 2.943)
9033it [00:38, 222.75it/s]

1.0793814659118652


4it [00:00, 682.83it/s]/s]
INFO:eval:Cross-entropy: 1.0793 nats (= perplexity 2.943)
INFO:hmm:Saving model to my_hmm.pkl
INFO:hmm:Saved model to my_hmm.pkl
9500it [00:40, 235.72it/s]

1.0792743075977673





In [9]:
log.info("*** A, B matrices after training on icsup (should approximately "
         "match initial params on spreadsheet [transposed])")
hmm.printAB()

INFO:test_ic:*** A, B matrices after training on icsup (should approximately match initial params on spreadsheet [transposed])


Transition matrix A:
	C	H	_EOS_TAG_	_BOS_TAG_
C	0.888	0.110	0.002	0.000
H	0.110	0.888	0.002	0.000
_EOS_TAG_	0.333	0.335	0.332	0.000
_BOS_TAG_	0.496	0.496	0.007	0.000

Emission matrix B:
	1	2	3
C	0.700	0.200	0.100
H	0.100	0.200	0.700
_EOS_TAG_	0.000	0.000	0.000
_BOS_TAG_	0.000	0.000	0.000




Since we used a low tolerance, that should have gotten us about up to the
initial parameters on the spreadsheet.  Let's tag the spreadsheet "sentence"
(that is, the sequence of ice creams) using the Viterbi algorithm.

In [10]:
log.info("*** Viterbi results on icraw")
icraw = TaggedCorpus(Path("icraw"), tagset=icsup.tagset, vocab=icsup.vocab)
write_tagging(hmm, icraw, Path("icraw.output"))  # calls hmm.viterbi_tagging on each sentence
os.system("cat icraw.output")   # print the file we just created, and remove it

INFO:test_ic:*** Viterbi results on icraw
1it [00:00, 56.06it/s]

[('_BOS_WORD_', '_BOS_TAG_'), ('2', 'H'), ('3', 'H'), ('3', 'H'), ('2', 'H'), ('3', 'H'), ('2', 'H'), ('3', 'H'), ('2', 'H'), ('2', 'H'), ('3', 'H'), ('1', 'H'), ('3', 'H'), ('3', 'H'), ('1', 'C'), ('1', 'C'), ('1', 'C'), ('2', 'C'), ('1', 'C'), ('1', 'C'), ('1', 'C'), ('3', 'C'), ('1', 'C'), ('2', 'C'), ('1', 'C'), ('1', 'C'), ('1', 'C'), ('2', 'C'), ('3', 'H'), ('3', 'H'), ('2', 'H'), ('3', 'H'), ('2', 'H'), ('2', 'H'), ('_EOS_WORD_', '_EOS_TAG_')]





0

Now let's use the forward algorithm to see what the model thinks about 
the probability of the spreadsheet "sentence."

In [11]:
log.info("*** Forward algorithm on icraw (should approximately match iteration 0 "
             "on spreadsheet)")
for sentence in icraw:
    prob = math.exp(hmm.log_prob(sentence, icraw))
    log.info(f"{prob} = p({sentence})")

INFO:test_ic:*** Forward algorithm on icraw (should approximately match iteration 0 on spreadsheet)
INFO:test_ic:2.503347234953789e-16 = p(2 3 3 2 3 2 3 2 2 3 1 3 3 1 1 1 2 1 1 1 3 1 2 1 1 1 2 3 3 2 3 2 2)


Finally, let's reestimate on the icraw data, as the spreadsheet does.

In [12]:
log.info("*** Reestimating on icraw (perplexity should improve on every iteration)")
negative_log_likelihood = lambda model: model_cross_entropy(model, icraw)  # evaluate on icraw itself
hmm.train(corpus=icraw, loss=negative_log_likelihood,
          minibatch_size=10, evalbatch_size=500, lr=0.001, tolerance=0.0001)

INFO:test_ic:*** Reestimating on icraw (perplexity should improve on every iteration)
INFO:hmm:Training HiddenMarkovModel with 36 parameters
1it [00:00, 115.89it/s]
INFO:eval:Cross-entropy: 1.0566 nats (= perplexity 2.877)


1.0565803752225988


1it [00:00, 143.58it/s]]
INFO:eval:Cross-entropy: nan nats (= perplexity nan)
511it [00:07, 62.28it/s]

nan


1it [00:00, 164.23it/s]]
INFO:eval:Cross-entropy: nan nats (= perplexity nan)
1012it [00:15, 75.91it/s]

nan


1it [00:00, 138.93it/s]s]
INFO:eval:Cross-entropy: nan nats (= perplexity nan)
1512it [00:22, 74.27it/s]

nan


1it [00:00, 188.75it/s]s]
INFO:eval:Cross-entropy: nan nats (= perplexity nan)
2015it [00:28, 91.77it/s]

nan


1it [00:00, 136.65it/s]s]
INFO:eval:Cross-entropy: nan nats (= perplexity nan)
2508it [00:34, 63.82it/s]

nan


1it [00:00, 173.22it/s]s]
INFO:eval:Cross-entropy: nan nats (= perplexity nan)
3017it [00:40, 81.98it/s]

nan


1it [00:00, 96.07it/s]/s]
INFO:eval:Cross-entropy: nan nats (= perplexity nan)
3509it [00:47, 46.69it/s]

nan


1it [00:00, 175.97it/s]s]
INFO:eval:Cross-entropy: nan nats (= perplexity nan)
4017it [00:54, 78.88it/s]

nan


1it [00:00, 203.99it/s]s]
INFO:eval:Cross-entropy: nan nats (= perplexity nan)
4515it [01:00, 84.30it/s]

nan


1it [00:00, 162.47it/s]s]
INFO:eval:Cross-entropy: nan nats (= perplexity nan)
5011it [01:06, 80.22it/s]

nan


1it [00:00, 145.52it/s]s]
INFO:eval:Cross-entropy: nan nats (= perplexity nan)
5508it [01:14, 74.35it/s]


nan


KeyboardInterrupt: 

In [None]:
log.info("*** A, B matrices after reestimation on icraw (SGD, not EM, but still "
         "should approximately match final params on spreadsheet [transposed])")
hmm.printAB()