## Using nlgeval Library

In [0]:
!pip install git+https://github.com/Maluuba/nlg-eval.git@master

Collecting git+https://github.com/Maluuba/nlg-eval.git@master
  Cloning https://github.com/Maluuba/nlg-eval.git (to revision master) to /tmp/pip-req-build-8ufpv8n7
  Running command git clone -q https://github.com/Maluuba/nlg-eval.git /tmp/pip-req-build-8ufpv8n7
Collecting psutil>=5.6.2
[?25l  Downloading https://files.pythonhosted.org/packages/c4/b8/3512f0e93e0db23a71d82485ba256071ebef99b227351f0f5540f744af41/psutil-5.7.0.tar.gz (449kB)
[K     |████████████████████████████████| 450kB 5.0MB/s 
Collecting xdg
  Downloading https://files.pythonhosted.org/packages/cd/7b/6ad85311fd715df37ef9bb17ad1b26e26b4cdd69c7e1e7e285422b83a7e1/xdg-4.0.1-py3-none-any.whl
Building wheels for collected packages: nlg-eval, psutil
  Building wheel for nlg-eval (setup.py) ... [?25l[?25hdone
  Created wheel for nlg-eval: filename=nlg_eval-2.3-cp36-none-any.whl size=68175138 sha256=cf527a19cbe59a2803114436b976d4d390d9ab65faa26b9de964291a1b9193ca
  Stored in directory: /tmp/pip-ephem-wheel-cache-0qb89m7n/wh

In [0]:
!nlg-eval --setup

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[31mInstalling to /root/.cache/nlgeval[0m
[31mIn case of incomplete downloads, delete the directory and run `nlg-eval --setup /root/.cache/nlgeval' again.[0m
Downloading http://nlp.stanford.edu/data/glove.6B.zip to /root/.cache/nlgeval.
Downloading http://www.cs.toronto.edu/~rkiros/models/dictionary.txt to /root/.cache/nlgeval.
Downloading https://raw.githubusercontent.com/robmsmt/glove-gensim/4c2224bccd61627b76c50a5e1d6afd1c82699d22/glove2word2vec.py to /usr/local/lib/python3.6/dist-packages/nlgeval/word2vec.
Downloading http://www.cs.toronto.edu/~rkiros/models/utable.npy to /root/.cache/nlgeval.
glove2word2vec.py: 100% 1.00/1.00 [00:00<00:00, 443 chunks/s]
Downloading http://www.cs.toronto.edu/~rkiros/models/btable.npy to /root/.cache/nlgeval.
dictionary.txt: 550 chunks [00:00, 824 chunks/s]
Downloading http://www.cs.toronto.edu/~rkiros/models/uni_skip.npz to /root/.cache/nlg

Example Documentation Code

In [0]:
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import os
import unittest

import nlgeval
from nlgeval import NLGEval


class TestNlgEval(unittest.TestCase):
    def test_compute_metrics_oo(self):
        # Create the object in the test so that it can be garbage collected once the test is done.
        n = NLGEval()

        # Individual Metrics
        scores = n.compute_individual_metrics(ref=["this is a test",
                                                   "this is also a test"],
                                              hyp="this is a good test")
        self.assertAlmostEqual(0.799999, scores['Bleu_1'], places=5)
        self.assertAlmostEqual(0.632455, scores['Bleu_2'], places=5)
        self.assertAlmostEqual(0.5108729, scores['Bleu_3'], places=5)
        self.assertAlmostEqual(0.0000903602, scores['Bleu_4'], places=5)
        self.assertAlmostEqual(0.44434387, scores['METEOR'], places=5)
        self.assertAlmostEqual(0.9070631, scores['ROUGE_L'], places=5)
        self.assertAlmostEqual(0.0, scores['CIDEr'], places=5)
        self.assertAlmostEqual(0.8375251, scores['SkipThoughtCS'], places=5)
        self.assertAlmostEqual(0.980075, scores['EmbeddingAverageCosineSimilarity'], places=5)
        self.assertEqual(scores['EmbeddingAverageCosineSimilarity'], scores['EmbeddingAverageCosineSimilairty'])
        self.assertAlmostEqual(0.94509, scores['VectorExtremaCosineSimilarity'], places=5)
        self.assertAlmostEqual(0.960771, scores['GreedyMatchingScore'], places=5)
        self.assertEqual(12, len(scores))

        scores = n.compute_metrics(ref_list=[
            [
                "this is one reference sentence for sentence1",
                "this is a reference sentence for sentence2 which was generated by your model"
            ],
            [
                "this is one more reference sentence for sentence1",
                "this is the second reference sentence for sentence2"
            ],
        ],
            hyp_list=[
                "this is the model generated sentence1 which seems good enough",
                "this is sentence2 which has been generated by your model"
            ]
        )
        self.assertAlmostEqual(0.55, scores['Bleu_1'], places=5)
        self.assertAlmostEqual(0.428174, scores['Bleu_2'], places=5)
        self.assertAlmostEqual(0.284043, scores['Bleu_3'], places=5)
        self.assertAlmostEqual(0.201143, scores['Bleu_4'], places=5)
        self.assertAlmostEqual(0.295797, scores['METEOR'], places=5)
        self.assertAlmostEqual(0.522104, scores['ROUGE_L'], places=5)
        self.assertAlmostEqual(1.242192, scores['CIDEr'], places=5)
        self.assertAlmostEqual(0.626149, scores['SkipThoughtCS'], places=5)
        self.assertAlmostEqual(0.88469, scores['EmbeddingAverageCosineSimilarity'], places=5)
        self.assertAlmostEqual(0.568696, scores['VectorExtremaCosineSimilarity'], places=5)
        self.assertAlmostEqual(0.784205, scores['GreedyMatchingScore'], places=5)
        self.assertEqual(12, len(scores))

        # Non-ASCII tests.
        scores = n.compute_individual_metrics(ref=["Test en français.",
                                                   "Le test en français."],
                                              hyp="Le test est en français.")
        self.assertAlmostEqual(0.799999, scores['Bleu_1'], places=5)
        self.assertAlmostEqual(0.632455, scores['Bleu_2'], places=5)
        self.assertAlmostEqual(0.0000051, scores['Bleu_3'], places=5)
        self.assertAlmostEqual(0, scores['Bleu_4'], places=5)
        self.assertAlmostEqual(0.48372379050300296, scores['METEOR'], places=5)
        self.assertAlmostEqual(0.9070631, scores['ROUGE_L'], places=5)
        self.assertAlmostEqual(0.0, scores['CIDEr'], places=5)
        self.assertAlmostEqual(0.9192341566085815, scores['SkipThoughtCS'], places=5)
        self.assertAlmostEqual(0.906562, scores['EmbeddingAverageCosineSimilarity'], places=5)
        self.assertEqual(scores['EmbeddingAverageCosineSimilarity'], scores['EmbeddingAverageCosineSimilairty'])
        self.assertAlmostEqual(0.815158, scores['VectorExtremaCosineSimilarity'], places=5)
        self.assertAlmostEqual(0.940959, scores['GreedyMatchingScore'], places=5)
        self.assertEqual(12, len(scores))

        scores = n.compute_individual_metrics(ref=["テスト"],
                                              hyp="テスト")
        self.assertAlmostEqual(0.99999999, scores['Bleu_1'], places=5)
        self.assertAlmostEqual(1.0, scores['METEOR'], places=3)
        self.assertAlmostEqual(1.0, scores['ROUGE_L'], places=3)
        self.assertAlmostEqual(0.0, scores['CIDEr'], places=3)
        self.assertAlmostEqual(1.0, scores['SkipThoughtCS'], places=3)
        self.assertAlmostEqual(1.0, scores['GreedyMatchingScore'], places=3)
        self.assertEqual(12, len(scores))

    def test_compute_metrics_omit(self):
        n = NLGEval(metrics_to_omit=['Bleu_3', 'METEOR', 'EmbeddingAverageCosineSimilarity'])

        # Individual Metrics
        scores = n.compute_individual_metrics(ref=["this is a test",
                                                   "this is also a test"],
                                              hyp="this is a good test")
        self.assertAlmostEqual(0.799999, scores['Bleu_1'], places=5)
        self.assertAlmostEqual(0.632455, scores['Bleu_2'], places=5)
        self.assertAlmostEqual(0.9070631, scores['ROUGE_L'], places=5)
        self.assertAlmostEqual(0.0, scores['CIDEr'], places=5)
        self.assertAlmostEqual(0.8375251, scores['SkipThoughtCS'], places=5)
        self.assertAlmostEqual(0.94509, scores['VectorExtremaCosineSimilarity'], places=5)
        self.assertAlmostEqual(0.960771, scores['GreedyMatchingScore'], places=5)
        self.assertEqual(7, len(scores))

    def test_compute_metrics(self):
        # The example from the README.
        root_dir = os.path.join(os.path.dirname(__file__), '..', '..')
        hypothesis = os.path.join(root_dir, 'examples/hyp.txt')
        references = os.path.join(root_dir, 'examples/ref1.txt'), os.path.join(root_dir, 'examples/ref2.txt')
        scores = nlgeval.compute_metrics(hypothesis, references)
        self.assertAlmostEqual(0.55, scores['Bleu_1'], places=5)
        self.assertAlmostEqual(0.428174, scores['Bleu_2'], places=5)
        self.assertAlmostEqual(0.284043, scores['Bleu_3'], places=5)
        self.assertAlmostEqual(0.201143, scores['Bleu_4'], places=5)
        self.assertAlmostEqual(0.295797, scores['METEOR'], places=5)
        self.assertAlmostEqual(0.522104, scores['ROUGE_L'], places=5)
        self.assertAlmostEqual(1.242192, scores['CIDEr'], places=5)
        self.assertAlmostEqual(0.626149, scores['SkipThoughtCS'], places=5)
        self.assertAlmostEqual(0.88469, scores['EmbeddingAverageCosineSimilarity'], places=5)
        self.assertEqual(scores['EmbeddingAverageCosineSimilarity'], scores['EmbeddingAverageCosineSimilairty'])
        self.assertAlmostEqual(0.568696, scores['VectorExtremaCosineSimilarity'], places=5)
        self.assertAlmostEqual(0.784205, scores['GreedyMatchingScore'], places=5)
        self.assertEqual(12, len(scores))

In [0]:
from __future__ import unicode_literals

import os
import unittest

import nlgeval
from nlgeval import NLGEval


n = NLGEval()

# Individual Metrics
scores = n.compute_individual_metrics(ref=["this is a test","this is also a test","this is test","this is really a test","yes, this is a test"],hyp="this is a good test")

  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


In [0]:
# print(scores)
print("Bleu_1 : ",scores['Bleu_1'])
print("Bleu_2 : ",scores['Bleu_2'])
print("Bleu_3 : ",scores['Bleu_3'])
print("Bleu_4 : ",scores['Bleu_4'])
print("METEOR : ",scores['METEOR'])
print("ROUGE_L : ",scores['ROUGE_L'])
print("CIDEr : ",scores['CIDEr'])
print("SkipThoughtCS : ",scores['SkipThoughtCS'])
print("EmbeddingAverageCosineSimilarity : ",scores['EmbeddingAverageCosineSimilarity'])
print("VectorExtremaCosineSimilarity : ",scores['VectorExtremaCosineSimilarity'])
print("GreedyMatchingScore : ",scores['GreedyMatchingScore'])

Bleu_1 :  0.7999999996800004
Bleu_2 :  0.6324555317648827
Bleu_3 :  0.5108729546934666
Bleu_4 :  9.036020031392194e-05
METEOR :  0.4443438716486749
ROUGE_L :  0.9070631970260222
CIDEr :  0.0
SkipThoughtCS :  0.837525
EmbeddingAverageCosineSimilarity :  0.983229
VectorExtremaCosineSimilarity :  0.94509
GreedyMatchingScore :  0.960771


## Using "pycocoevalcap" Library

In [6]:
!pip install --upgrade pip

Requirement already up-to-date: pip in /usr/local/lib/python3.6/dist-packages (20.0.2)


In [7]:
pip install pycocotools



In [8]:
!pip install git+https://github.com/salaniz/pycocoevalcap

Collecting git+https://github.com/salaniz/pycocoevalcap
  Cloning https://github.com/salaniz/pycocoevalcap to /tmp/pip-req-build-wlrxqwr8
  Running command git clone -q https://github.com/salaniz/pycocoevalcap /tmp/pip-req-build-wlrxqwr8
Building wheels for collected packages: pycocoevalcap
  Building wheel for pycocoevalcap (setup.py) ... [?25l[?25hdone
  Created wheel for pycocoevalcap: filename=pycocoevalcap-1.1-py3-none-any.whl size=104310172 sha256=6b58ce6a9048212c7658eb3b8c92401cd89d44b697334fe87018d26fb79d5811
  Stored in directory: /tmp/pip-ephem-wheel-cache-tr57ri1u/wheels/df/74/69/758b2491ca93bf681a1509671df34df9cf5ff605edf6e112ed
Successfully built pycocoevalcap


In [0]:
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.meteor.meteor import Meteor
from pycocoevalcap.rouge.rouge import Rouge
from pycocoevalcap.cider.cider import Cider
from pycocoevalcap.spice.spice import Spice

Evaluating captions from JSON files

In [11]:
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.cider.cider import Cider
from pycocoevalcap.meteor.meteor import Meteor
from pycocoevalcap.rouge.rouge import Rouge
from pycocoevalcap.spice.spice import Spice
import json

with open('/content/gts.json', 'r') as file:
    gts = json.load(file)
with open('/content/res.json', 'r') as file:
    res = json.load(file)

def bleu():
    scorer = Bleu(n=4)
    # scorer += (hypo[0], ref1)   # hypo[0] = 'word1 word2 word3 ...'
    #                                 # ref = ['word1 word2 word3 ...', 'word1 word2 word3 ...']
    score, scores = scorer.compute_score(gts, res)

    print('belu = %s' % score)

def cider():
    scorer = Cider()
    # scorer += (hypo[0], ref1)
    (score, scores) = scorer.compute_score(gts, res)
    print('cider = %s' % score)

def meteor():
    scorer = Meteor()
    score, scores = scorer.compute_score(gts, res)
    print('meter = %s' % score)

def rouge():
    scorer = Rouge()
    score, scores = scorer.compute_score(gts, res)
    print('rouge = %s' % score)

def spice():
    scorer = Spice()
    score, scores = scorer.compute_score(gts, res)
    print('spice = %s' % score)

def main():
    bleu()
    cider()
    meteor()
    rouge()
    spice()
main()

{'testlen': 9893, 'reflen': 9855, 'guess': [9893, 8893, 7893, 6893], 'correct': [5732, 2510, 1043, 423]}
ratio: 1.003855910705124
belu = [0.5793995754573356, 0.40439129741018104, 0.2785363856619147, 0.1908290437674253]
cider = 0.5997905185184199
meter = 0.19525467177780284
rouge = 0.39625269357570847
Downloading stanford-corenlp-3.6.0 for SPICE ...
Progress: 384.5M / 384.5M (100.0%)
Extracting stanford-corenlp-3.6.0 ...
Done.
spice = 0.13288891803208092


In [0]:
class COCOEvalCap:
    def __init__(self, coco, cocoRes):
        self.evalImgs = []
        self.eval = {}
        self.imgToEval = {}
        self.coco = coco
        self.cocoRes = cocoRes
        self.params = {'image_id': coco.getImgIds()}

    def evaluate(self):
        imgIds = self.params['image_id']
        # imgIds = self.coco.getImgIds()
        gts = {}
        res = {}
        for imgId in imgIds:
            gts[imgId] = self.coco.imgToAnns[imgId]
            res[imgId] = self.cocoRes.imgToAnns[imgId]

        # =================================================
        # Set up scorers
        # =================================================
        print('tokenization...')
        tokenizer = PTBTokenizer()
        gts  = tokenizer.tokenize(gts)
        res = tokenizer.tokenize(res)

        # =================================================
        # Set up scorers
        # =================================================
        print('setting up scorers...')
        scorers = [
            (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
            (Meteor(),"METEOR"),
            (Rouge(), "ROUGE_L"),
            (Cider(), "CIDEr"),
            (Spice(), "SPICE")
        ]

        # =================================================
        # Compute scores
        # =================================================
        for scorer, method in scorers:
            print('computing %s score...'%(scorer.method()))
            score, scores = scorer.compute_score(gts, res)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    self.setEval(sc, m)
                    self.setImgToEvalImgs(scs, gts.keys(), m)
                    print("%s: %0.3f"%(m, sc))
            else:
                self.setEval(score, method)
                self.setImgToEvalImgs(scores, gts.keys(), method)
                print("%s: %0.3f"%(method, score))
        self.setEvalImgs()

    def setEval(self, score, method):
        self.eval[method] = score

    def setImgToEvalImgs(self, scores, imgIds, method):
        for imgId, score in zip(imgIds, scores):
            if not imgId in self.imgToEval:
                self.imgToEval[imgId] = {}
                self.imgToEval[imgId]["image_id"] = imgId
            self.imgToEval[imgId][method] = score

    def setEvalImgs(self):
        self.evalImgs = [eval for imgId, eval in self.imgToEval.items()]

In [0]:
def calc_scores(ref, hypo):
    """
    ref, dictionary of reference sentences (id, sentence)  ------> here, each element of ref is a list
    hypo, dictionary of hypothesis sentences (id, sentence)
    score, dictionary of scores
    """
    print(type(ref))
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Meteor(),"METEOR"),
        (Rouge(), "ROUGE_L"),
        (Cider(), "CIDEr"),
        (Spice(), "SPICE")
    ]
    final_scores = {}
    for scorer, method in scorers:
        score, scores = scorer.compute_score(ref, hypo)
        if type(score) == list:
            for m, s in zip(method, score):
                final_scores[m] = s
        else:
            final_scores[method] = score
    return final_scores 

In [0]:
ref = {}
hypo = {}

ref[0] = ["this is one reference sentence for sentence1","this is a reference sentence for sentence2 which was generated by your model"]
hypo[0] = ["this is the model generated sentence1 which seems good enough"]

In [0]:
references = [["this is one reference sentence for sentence1","this is a reference sentence for sentence2 which was generated by your model"],["this is one more reference sentence for sentence1","this is the second reference sentence for sentence2"],],
hypothesis = ["this is the model generated sentence1 which seems good enough","this is sentence2 which has been generated by your model"]

In [34]:
calc_scores(ref, hypo)

<class 'dict'>
{'testlen': 10, 'reflen': 7, 'guess': [10, 9, 8, 7], 'correct': [6, 1, 0, 0]}
ratio: 1.4285714283673472


{'Bleu_1': 0.5999999999400001,
 'Bleu_2': 0.25819888971990695,
 'Bleu_3': 2.027400664963992e-06,
 'Bleu_4': 5.873949093995857e-09,
 'CIDEr': 0.0,
 'METEOR': 0.16741198894585999,
 'ROUGE_L': 0.36454183266932266,
 'SPICE': 0.36363636363636365}

In [21]:
from pycocotools import mask as mask
print(mask.__author__)

tsungyi


In [0]:
def calc_scores(ref, hypo):
    """
    ref, dictionary of reference sentences (id, sentence)
    hypo, dictionary of hypothesis sentences (id, sentence)
    score, dictionary of scores
    """
    print(type(hypo.keys))
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Meteor(),"METEOR"),
        (Rouge(), "ROUGE_L"),
        (Cider(), "CIDEr"),
        (Spice(), "SPICE")
    ]
    final_scores = {}
    for scorer, method in scorers:
        score, scores = scorer.compute_score(ref, hypo)
        if type(score) == list:
            for m, s in zip(method, score):
                final_scores[m] = s
        else:
            final_scores[method] = score
    return final_scores 

In [0]:
ref = {}
hypo = {}

ref[0] = ["this is one reference sentence for sentence1","this is a reference sentence for sentence2 which was generated by your model"]
ref[1] = ["this is one more reference sentence for sentence1","this is the second reference sentence for sentence2"]
hypo[0] = ["this is the model generated sentence1 which seems good enough"]
hypo[1] = ["this is sentence2 which has been generated by your model"]

In [31]:
print(calc_scores(ref, hypo))

<class 'builtin_function_or_method'>
{'testlen': 20, 'reflen': 15, 'guess': [20, 18, 16, 14], 'correct': [9, 2, 0, 0]}
ratio: 1.3333333332444444
{'Bleu_1': 0.44999999997750006, 'Bleu_2': 0.22360679773817757, 'Bleu_3': 1.4620088690245348e-06, 'Bleu_4': 3.8652758782383096e-09, 'METEOR': 0.1552910794167853, 'ROUGE_L': 0.3523452657770405, 'CIDEr': 0.23638309890658804, 'SPICE': 0.2727272727272727}


## Using nltk Libraries

In [0]:
from nltk.translate import AlignedSent, Alignment
import nltk.translate.gleu_score
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.gleu_score import sentence_gleu
from nltk.translate.gleu_score import corpus_gleu
from nltk.translate.bleu_score import SmoothingFunction

In [0]:
hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which','ensures', 'that', 'the', 'military', 'always','obeys', 'the', 'commands', 'of', 'the', 'party']
ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that','ensures', 'that', 'the', 'military', 'will', 'forever','heed', 'Party', 'commands']
ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',          'guarantees', 'the', 'military', 'forces', 'always','being', 'under', 'the', 'command', 'of', 'the', 'Party']
ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the','army', 'always', 'to', 'heed', 'the', 'directions','of', 'the', 'party']

In [0]:
hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was','interested', 'in', 'world', 'history']
ref2a = ['he', 'was', 'interested', 'in', 'world', 'history','because', 'he', 'read', 'the', 'book']

In [0]:
hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which','ensures', 'that', 'the', 'military', 'always','obeys', 'the', 'commands', 'of', 'the', 'party']
hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops','forever', 'hearing', 'the', 'activity', 'guidebook','that', 'party', 'direct']

In [0]:
 reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that','ensures', 'that', 'the', 'military', 'will', 'forever','heed', 'Party', 'commands']
 reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which','guarantees', 'the', 'military', 'forces', 'always','being', 'under', 'the', 'command', 'of', 'the','Party']
 reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the','army', 'always', 'to', 'heed', 'the', 'directions','of', 'the', 'party']

In [0]:
list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
hypotheses = [hyp1, hyp2]
corpus_gleu(list_of_references, hypotheses)

0.5673076923076923

In [0]:
corpus_bleu(list_of_references, hypotheses)

0.5920778868801042

In [0]:
score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
score2 = sentence_bleu([ref2a], hyp2)
(score1 + score2) / 2

0.6223247442490669

In [0]:
sentence_bleu([reference1, reference2, reference3], hypothesis1)

0.5045666840058485

In [0]:
sentence_gleu([reference1, reference2, reference3], hypothesis1)

0.4393939393939394

In [0]:
round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4)

Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


0.3969

In [0]:
chencherry = SmoothingFunction()
sentence_bleu([reference1, reference2, reference3], hypothesis2,smoothing_function=chencherry.method1)

0.03703131191121491

In [0]:
weights = (.25, .25, .25, .25)
sentence_bleu([reference1, reference2, reference3], hypothesis1, weights)

0.5045666840058485

More Information can be obtained from <a href="https://www.nltk.org/api/nltk.translate.html">this link</a> .