|
| 1 | +import pytest |
| 2 | +import torch |
| 3 | +from nltk.translate.bleu_score import SmoothingFunction, corpus_bleu, sentence_bleu |
| 4 | + |
| 5 | +from pytorch_lightning.metrics.functional.nlp import bleu_score |
| 6 | + |
| 7 | +# example taken from |
| 8 | +# https://www.nltk.org/api/nltk.translate.html?highlight=bleu%20score#nltk.translate.bleu_score.sentence_bleu |
| 9 | +HYPOTHESIS1 = tuple( |
| 10 | + "It is a guide to action which ensures that the military always obeys the commands of the party".split() |
| 11 | +) |
| 12 | +REFERENCE1 = tuple("It is a guide to action that ensures that the military will forever heed Party commands".split()) |
| 13 | +REFERENCE2 = tuple( |
| 14 | + "It is a guiding principle which makes the military forces always being under the command of the Party".split() |
| 15 | +) |
| 16 | +REFERENCE3 = tuple("It is the practical guide for the army always to heed the directions of the party".split()) |
| 17 | + |
| 18 | + |
| 19 | +# example taken from |
| 20 | +# https://www.nltk.org/api/nltk.translate.html?highlight=bleu%20score#nltk.translate.bleu_score.corpus_bleu |
| 21 | +HYP1 = "It is a guide to action which ensures that the military always obeys the commands of the party".split() |
| 22 | +HYP2 = "he read the book because he was interested in world history".split() |
| 23 | + |
| 24 | +REF1A = "It is a guide to action that ensures that the military will forever heed Party commands".split() |
| 25 | +REF1B = "It is a guiding principle which makes the military force always being under the command of the Party".split() |
| 26 | +REF1C = "It is the practical guide for the army always to heed the directions of the party".split() |
| 27 | +REF2A = "he was interested in world history because he read the book".split() |
| 28 | + |
| 29 | +LIST_OF_REFERENCES = [[REF1A, REF1B, REF1C], [REF2A]] |
| 30 | +HYPOTHESES = [HYP1, HYP2] |
| 31 | + |
| 32 | +# https://www.nltk.org/api/nltk.translate.html?highlight=bleu%20score#nltk.translate.bleu_score.SmoothingFunction |
| 33 | +smooth_func = SmoothingFunction().method2 |
| 34 | + |
| 35 | + |
| 36 | +@pytest.mark.parametrize( |
| 37 | + ["weights", "n_gram", "smooth_func", "smooth"], |
| 38 | + [ |
| 39 | + pytest.param([1], 1, None, False), |
| 40 | + pytest.param([0.5, 0.5], 2, smooth_func, True), |
| 41 | + pytest.param([0.333333, 0.333333, 0.333333], 3, None, False), |
| 42 | + pytest.param([0.25, 0.25, 0.25, 0.25], 4, smooth_func, True), |
| 43 | + ], |
| 44 | +) |
| 45 | +def test_bleu_score(weights, n_gram, smooth_func, smooth): |
| 46 | + nltk_output = sentence_bleu( |
| 47 | + [REFERENCE1, REFERENCE2, REFERENCE3], HYPOTHESIS1, weights=weights, smoothing_function=smooth_func |
| 48 | + ) |
| 49 | + pl_output = bleu_score([HYPOTHESIS1], [[REFERENCE1, REFERENCE2, REFERENCE3]], n_gram=n_gram, smooth=smooth) |
| 50 | + assert torch.allclose(pl_output, torch.tensor(nltk_output)) |
| 51 | + |
| 52 | + nltk_output = corpus_bleu(LIST_OF_REFERENCES, HYPOTHESES, weights=weights, smoothing_function=smooth_func) |
| 53 | + pl_output = bleu_score(HYPOTHESES, LIST_OF_REFERENCES, n_gram=n_gram, smooth=smooth) |
| 54 | + assert torch.allclose(pl_output, torch.tensor(nltk_output)) |
| 55 | + |
| 56 | + |
| 57 | +def test_bleu_empty(): |
| 58 | + hyp = [[]] |
| 59 | + ref = [[[]]] |
| 60 | + assert bleu_score(hyp, ref) == torch.tensor(0.0) |
| 61 | + |
| 62 | + |
| 63 | +def test_no_4_gram(): |
| 64 | + hyps = [["My", "full", "pytorch-lightning"]] |
| 65 | + refs = [[["My", "full", "pytorch-lightning", "test"], ["Completely", "Different"]]] |
| 66 | + assert bleu_score(hyps, refs) == torch.tensor(0.0) |
0 commit comments