In [1]:
import torch
import evaluate
import easyocr
from tqdm.auto import tqdm


class CraftOCap:
    def __init__(self, use_gpu=None):
        if use_gpu is None:
            use_gpu = torch.cuda.is_available()

        self.ocr = easyocr.Reader(["en"], detect_network='craft', gpu=use_gpu)
        self.chrf = evaluate.load("chrf", char_order=4)

    def compute(self, *, images=None, captions=None, progress_bar=True):
        """
        Args:
            images: a numpy array of shape (N, H, W, 3)
            captions: list of captions
        """
        assert len(images) == len(captions)
        ocr_texts = []
        for image in tqdm(images, disable=not progress_bar):
            ocr_output = self.ocr.readtext(
                image=image,
                batch_size=64,
            )
            
            full_ocr_text = ""
            for ocr_item in ocr_output:
                ocr_confidence = ocr_item[2]
                ocr_text = ocr_item[1].lower()
                full_ocr_text += ocr_text + " "
            
            ocr_texts.append(full_ocr_text)
        
        score = self.chrf.compute(predictions=ocr_texts, references=captions)
        return score

  from .autonotebook import tqdm as notebook_tqdm


In [1]:
import argparse
import time
import os
import json
from collections import Counter
from glob import glob

import torch
import evaluate
import easyocr
from tqdm.auto import tqdm

# fix easyocr version 1.6.2
use_gpu = False
ocr_system = easyocr.Reader(["en"], detect_network='craft', gpu=use_gpu)
# chrf = evaluate.load("chrf")

  from .autonotebook import tqdm as notebook_tqdm
Using CPU. Note: This module is much faster with a GPU.


In [9]:
from sacrebleu.metrics.chrf import CHRF

In [10]:
del chrf

In [15]:
chrf = evaluate.load("chrf")

Downloading builder script: 100%|██████████| 9.01k/9.01k [00:00<00:00, 1.88MB/s]


In [18]:
chrf.

'@inproceedings{popovic-2015-chrf,\n    title = "chr{F}: character n-gram {F}-score for automatic {MT} evaluation",\n    author = "Popovi{\'c}, Maja",\n    booktitle = "Proceedings of the Tenth Workshop on Statistical Machine Translation",\n    month = sep,\n    year = "2015",\n    address = "Lisbon, Portugal",\n    publisher = "Association for Computational Linguistics",\n    url = "https://aclanthology.org/W15-3049",\n    doi = "10.18653/v1/W15-3049",\n    pages = "392--395",\n}\n@inproceedings{popovic-2017-chrf,\n    title = "chr{F}++: words helping character n-grams",\n    author = "Popovi{\'c}, Maja",\n    booktitle = "Proceedings of the Second Conference on Machine Translation",\n    month = sep,\n    year = "2017",\n    address = "Copenhagen, Denmark",\n    publisher = "Association for Computational Linguistics",\n    url = "https://aclanthology.org/W17-4770",\n    doi = "10.18653/v1/W17-4770",\n    pages = "612--618",\n}\n@inproceedings{post-2018-call,\n    title = "A Call for 

In [13]:
chrf = CHRF(word_order=0)

chrf.sentence_score(hypothesis="hello world", references=["hello world"])

chrF2 = 100.00

In [None]:
import requ

# Modifying CHRF to use mutiple hypotheses

In [4]:
from typing import *

def extract_word_ngrams(tokens: List[str], n: int) -> Counter:
    """Extracts n-grams with order `n` from a list of tokens.
    :param tokens: A list of tokens.
    :param n: The order of n-grams.
    :return: a Counter object with n-grams counts.
    """
    return Counter([' '.join(tokens[i:i + n]) for i in range(len(tokens) - n + 1)])


def extract_all_word_ngrams(line: str, min_order: int, max_order: int):
    """Extracts all ngrams (min_order <= n <= max_order) from a sentence.
    :param line: A string sentence.
    :param min_order: Minimum n-gram order.
    :param max_order: Maximum n-gram order.
    :return: a Counter object with n-grams counts and the sequence length.
    """

    ngrams = []
    tokens = line.split()

    for n in range(min_order, max_order + 1):
        for i in range(0, len(tokens) - n + 1):
            ngrams.append(tuple(tokens[i: i + n]))

    return Counter(ngrams), len(tokens)


def extract_all_char_ngrams(
        line: str, max_order: int, include_whitespace: bool = False) -> List[Counter]:
    """Extracts all character n-grams at once for convenience.
    :param line: A segment containing a sequence of words.
    :param max_order: The maximum order of the n-grams.
    :param include_whitespace: If given, will not strip whitespaces from the line.
    :return: a list of Counter objects containing ngrams and counts.
    """

    counters = []

    if not include_whitespace:
        line = ''.join(line.split())

    for n in range(1, max_order + 1):
        ngrams = Counter([line[i:i + n] for i in range(len(line) - n + 1)])
        counters.append(ngrams)

    return counters

def sum_of_lists(lists):
    """Aggregates list of numeric lists by summing."""
    if len(lists) == 1:
        return lists[0]

    # Preserve datatype
    size = len(lists[0])
    init_val = type(lists[0][0])(0.0)
    total = [init_val] * size
    for ll in lists:
        for i in range(size):
            total[i] += ll[i]
    return total


In [6]:
from sacrebleu.metrics.base import Score, Metric

class CHRFMultihypScore(Score):
    """A convenience class to represent chrF scores.
    :param score: The chrF (chrF++) score.
    :param char_order: The character n-gram order.
    :param word_order: The word n-gram order. If equals to 2, the metric is referred to as chrF++.
    :param beta: Determine the importance of recall w.r.t precision.
    """
    def __init__(self, score: float, char_order: int, word_order: int, beta: int):
        """`CHRFScore` initializer."""
        self.beta = beta
        self.char_order = char_order
        self.word_order = word_order

        # Add + signs to denote chrF+ variant
        name = f'chrF{self.beta}' + '+' * self.word_order

        super().__init__(name, score)


In [7]:
class CHRFMultihyp(Metric):
    """Computes the chrF(++) metric given hypotheses and references.
    Compared to sacrebleu implementation (which it is based on) it can accept multiple hypotheses, which is requres for *OCap.

    :param char_order: Character n-gram order.
    :param word_order: Word n-gram order. If equals to 2, the metric is referred to as chrF++.
    :param beta: Determine the importance of recall w.r.t precision.
    :param lowercase: Enable case-insensitivity.
    :param whitespace: If `True`, include whitespaces when extracting character n-grams.
    :param eps_smoothing: If `True`, applies epsilon smoothing similar
    to reference chrF++.py, NLTK and Moses implementations. Otherwise,
    it takes into account effective match order similar to sacreBLEU < 2.0.0.
    :param references: A sequence of reference documents with document being
    defined as a sequence of reference strings. If given, the reference n-grams
    will be pre-computed and cached for faster re-computation across many systems.
    """

    # Maximum character n-gram order to take into account
    CHAR_ORDER = 6

    # chrF+ additionally takes into account some of the word n-grams
    WORD_ORDER = 0

    # Defaults to 2 (per http://www.aclweb.org/anthology/W16-2341)
    BETA = 2

    # Cache string.punctuation for chrF+' punctuation stripper
    _PUNCTS = set('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~')

    # _SIGNATURE_TYPE = CHRFSignature

    def __init__(self, char_order: int = CHAR_ORDER,
                 word_order: int = WORD_ORDER,
                 beta: int = BETA,
                 lowercase: bool = False,
                 whitespace: bool = False,
                 eps_smoothing: bool = False,
                 references: Optional[Sequence[Sequence[str]]] = None):
        """`CHRF` initializer."""
        super().__init__()

        self.beta = beta
        self.char_order = char_order
        self.word_order = word_order
        self.order = self.char_order + self.word_order
        self.lowercase = lowercase
        self.whitespace = whitespace
        self.eps_smoothing = eps_smoothing

        # if references is not None:
        #     # Pre-compute reference ngrams
        #     self._ref_cache = self._cache_references(references)

    @staticmethod
    def _get_match_statistics(hyp_ngrams_list: List[Counter], ref_ngrams: Counter) -> List[int]:
        """Computes the match statistics between hypothesis and reference n-grams.
        :param hyp_ngrams: A `Counter` holding hypothesis n-grams.
        :param ref_ngrams: A `Counter` holding reference n-grams.
        :return: A list of three numbers denoting hypothesis n-gram count,
            reference n-gram count and the intersection count.
        """
        # Counter's internal intersection is not that fast, count manually
        match_count, hyp_count = 0, 0
        for hyp_ngrams in hyp_ngrams_list:
            for ng, count in hyp_ngrams.items():
                hyp_count += count
                if ng in ref_ngrams:
                    match_count += min(count, ref_ngrams[ng])

        return [
            # Don't count hits if no reference exists for that n-gram
            hyp_count if ref_ngrams else 0,
            sum(ref_ngrams.values()),
            match_count,
        ]

    def _remove_punctuation(self, sent: str) -> List[str]:
        """Separates out punctuations from beginning and end of words for chrF.
        Adapted from https://github.com/m-popovic/chrF
        :param sent: A string.
        :return: A list of words.
        """
        tokenized = []
        for w in sent.split():
            if len(w) == 1:
                tokenized.append(w)
            else:
                # NOTE: This splits '(hi)' to '(hi' and ')' (issue #124)
                if w[-1] in self._PUNCTS:
                    tokenized += [w[:-1], w[-1]]
                elif w[0] in self._PUNCTS:
                    tokenized += [w[0], w[1:]]
                else:
                    tokenized.append(w)
        return tokenized

    def _preprocess_segment(self, sent: str) -> str:
        """Given a sentence, apply optional lowercasing.
        :param sent: The input sentence string.
        :return: The pre-processed output string.
        """
        return sent.lower() if self.lowercase else sent

    def _compute_f_score(self, statistics: List[int]) -> float:
        """Compute the chrF score given the n-gram match statistics.
        :param statistics: A flattened list of 3 * (`char_order` + `word_order`)
            elements giving the [hyp, ref, match] counts for each order.
        :return: The final f_beta score between [0, 100].
        """
        eps = 1e-16
        score = 0.0
        effective_order = 0
        factor = self.beta ** 2
        avg_prec, avg_rec = 0.0, 0.0

        for i in range(self.order):
            n_hyp, n_ref, n_match = statistics[3 * i: 3 * i + 3]

            # chrF++.py style EPS smoothing (also used by Moses and NLTK)
            prec = n_match / n_hyp if n_hyp > 0 else eps
            rec = n_match / n_ref if n_ref > 0 else eps

            denom = factor * prec + rec
            score += ((1 + factor) * prec * rec / denom) if denom > 0 else eps

            # sacreBLEU <2.0.0 style effective order smoothing
            if n_hyp > 0 and n_ref > 0:
                avg_prec += prec
                avg_rec += rec
                effective_order += 1

        if self.eps_smoothing:
            return 100 * score / self.order

        if effective_order == 0:
            avg_prec = avg_rec = 0.0
        else:
            avg_prec /= effective_order
            avg_rec /= effective_order

        if avg_prec + avg_rec:
            score = (1 + factor) * avg_prec * avg_rec
            score /= ((factor * avg_prec) + avg_rec)
            return 100 * score
        else:
            return 0.0

    def _compute_score_from_stats(self, stats: List[int]) -> CHRFMultihypScore:
        """Computes the final score from already aggregated statistics.
        :param stats: A list or numpy array of segment-level statistics.
        :return: A `CHRFScore` object.
        """
        return CHRFMultihypScore(
            self._compute_f_score(stats),
            self.char_order,
            self.word_order,
            self.beta,
        )

    def _aggregate_and_compute(self, stats: List[List[int]]) -> CHRFMultihypScore:
        """Computes the final score given the pre-computed corpus statistics.
        :param stats: A list of segment-level statistics
        :return: A `CHRFScore` object.
        """
        return self._compute_score_from_stats(sum_of_lists(stats))

    def _extract_reference_info(self, refs: Sequence[str]) -> Dict[str, List[List[Counter]]]:
        """Given a list of reference segments, extract the character and word n-grams.
        :param refs: A sequence of reference segments.
        :return: A list where each element contains n-grams per reference segment.
        """
        ngrams = []

        for ref in refs:
            # extract character n-grams
            stats = extract_all_char_ngrams(ref, self.char_order, self.whitespace)

            # Check chrF+ mode
            if self.word_order > 0:
                ref_words = self._remove_punctuation(ref)

                for n in range(self.word_order):
                    stats.append(extract_word_ngrams(ref_words, n + 1))

            ngrams.append(stats)

        return {'ref_ngrams': ngrams}

    def _compute_segment_statistics(
            self, hypothesis: List[str], ref_kwargs: Dict) -> List[int]:
        """Given a (pre-processed) hypothesis sentence and already computed
        reference n-grams, returns the best match statistics across the
        references.
        :param hypothesis: Hypothesis sentence.
        :param ref_kwargs: A dictionary with key `ref_ngrams` which is a list
        where each sublist contains n-gram counters for a particular reference sentence.
        :return: A list of integers where each triplet denotes [hyp, ref, match]
        statistics.
        """
        best_stats = []
        best_f_score = -1.0

        # extract character n-grams
        # all_hyp_ngrams = [
        #     extract_all_char_ngrams(h, self.char_order, self.whitespace)
        #     for h in hypothesis
        # ]

        all_hyp_ngrams_list = []
        for hyp in hypothesis:
            all_hyp_ngrams = extract_all_char_ngrams(
                hypothesis, self.char_order, self.whitespace)

            # Check chrF+ mode to see if we'll add word n-grams as well
            if self.word_order > 0:
                # Primitive tokenization: separate out punctuations
                hwords = self._remove_punctuation(hypothesis)
                _range = range(1, self.word_order + 1)
                all_hyp_ngrams.extend([extract_word_ngrams(hwords, n) for n in _range])
            
            all_hyp_ngrams_list.append(all_hyp_ngrams)

        # Iterate over multiple references, pick the one with best F score
        for _ref_ngrams in ref_kwargs['ref_ngrams']:
            stats = []
            # Traverse all orders
            for h, r in zip(all_hyp_ngrams, _ref_ngrams):
                stats.extend(self._get_match_statistics(h, r))
            f_score = self._compute_f_score(stats)

            if f_score > best_f_score:
                best_f_score = f_score
                best_stats = stats

        return best_stats