In [1]:
%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

def import_sys():
    import sys
    sys.path.append('..')
import_sys()

import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)  # pylint: disable=invalid-name

In [2]:
import errudite
print(errudite)

<module 'errudite' from '/Users/gyuhoshim/errudite/errudite/__init__.py'>


In [3]:
import pandas as pd
from tqdm import tqdm

from overrides import overrides

from errudite.io import DatasetReader
from errudite.utils import normalize_file_path, accuracy_score
from errudite.targets.instance import Instance
from errudite.targets.target import Target
from errudite.targets.label import Label, PredefinedLabel



## 1. DatasetReader

In [None]:
@DatasetReader.register("STE")
class STEReader(DatasetReader):
    def __init__(self, cache_folder_path: str=None) -> None:
        super().__init__(cache_folder_path)
        # overwrite the primary evaluation method and metric name
        Label.set_task_evaluator(accuracy_score, 'accuracy')
        
    @overrides
    def _read(self, file_path: str, lazy: bool, sample_size: int):
        """
        Returns a list containing all the instances in the specified dataset.

        Parameters
        ----------
        file_path : str
            The path of the input data file.
        lazy : bool, optional
            If ``lazy==True``, only run the tokenization, does not compute the linguistic
            features like POS, NER. By default False
        sample_size : int, optional
            If sample size is set, only load this many of instances, by default None
        
        Returns
        -------
        List[Instance]
            The instance list.
        """
        logger.info("Reading instances from lines in file at: %s", file_path)
        df = pd.read_csv(normalize_file_path(file_path), sep='\t')
        for idx, row in tqdm(df.iterrows()):
            if lazy:
                premises.append(row['sentence1'])
                hypotheses.append(row['sentence2'])
            else:
                instance = self._text_to_instance(f'q:{idx}', row)
                if instance is not None:
                    instances.append(instance)
                if sample_size and idx > sample_size:
                    break
        if lazy:
            return { "premise": premises, "hypoethsis": hypotheses }
        else:
            return instances
    
    @overrides
    def _text_to_instance(self, id: str, row) -> Instance:
        # The function that transfers raw text to instance.
        premise = Target(qid=row['pairID'], text=row['sentence1'], vid=0, metas={'type': 'premise'})
        hypothesis = Target(qid=row['pairID'], text=row['sentence2'], vid=0, metas={'type': 'hypothesis'})
        # label
        raw_labels = [row[f'label{i}']  for i in range(1,6)]
        groundtruth = PredefinedLabel(
            model='groundtruth', 
            qid=row['pairID'], 
            text=row['gold_label'], 
            vid=0, 
            metas={'raw_labels': raw_labels}
        )
        return self.create_instance(row['pairID'], 
            hypothesis=hypothesis, 
            premise=premise, 
            groundtruth=groundtruth)

In [None]:
#This reader, as we did before, can be queried via:

from errudite.readers import DatasetReader
DatasetReader.by_name("STE")

## 2. Predictor

In [4]:
from typing import List, Dict
from ..predictor import Predictor
from ...targets.label import Label, PredefinedLabel
from ..predictor_allennlp import PredictorAllennlp # a wrapper for Allennlp classes

ValueError: attempted relative import beyond top-level package

In [6]:
ls ../

LICENSE                [34mmyenv[m[m/                 [34mscripts[m[m/
README.md              [34mnli[m[m/                   [34msentiment_analysis[m[m/
__init__.py            predictor.py           setup.py
[34m__pycache__[m[m/           predictor_allennlp.py  [34mtutorials[m[m/
config.yml             [34mpredictors[m[m/            [34mui[m[m/
[34mdocs[m[m/                  [34mpy36env[m[m/               [34mvqa[m[m/
[34merrudite[m[m/              [34mqa[m[m/
[34merrudite.egg-info[m[m/     requirements.txt


In [None]:


@Predictor.register("nli_decompose_att")
class PredictorNLI(Predictor, PredictorAllennlp):
    """
    The wrapper for DecomposableAttention model, as implemented in Allennlp:
    https://allenai.github.io/allennlp-docs/api/allennlp.predictors.html#decomposable-attention
    """
    def __init__(self, name: str, 
        model_path: str=None,
        model_online_path: str=None,
        description: str='') -> None:
        PredictorAllennlp.__init__(self, name, model_path, model_online_path, description)
        Predictor.__init__(self, name, description, model, perform_metrics)
        # set the perform metrics
        perform_metrics = ['accuracy', 'confidence']
        # First, define the evaluation function to determine how well a model is doing 
        # on one instance, based on an individual predicted label.
        from ...utils.evaluator import accuracy_score
        # Second, from the metrics above, pick one that's primary, and it will be used 
        # to compute `is_incorrect()` in any label target object: primary metric < 1.
        Label.set_task_evaluator(
            # the evaluation function that accepts pred and groundtruths, 
            # and return a dict of metrics: { metric_name: metric_score }. 
            # This is saved as Label.task_evaluation_func.
            task_evaluation_func=accuracy_score, 
            # The primary task metric name, ideally a key of task_evaluation_func ‘s return.
            task_primary_metric='accuracy')

    # the raw prediction function, returning the output of the model in a json format.
    def predict(self, premise: str, hypothesis: str) -> Dict[str, float]:
        try:
            labels = ['entailment', 'contradiction', 'neutral']
            predicted = self.model.predict_json({
                "premise": premise, "hypothesis":hypothesis})
            return {
                'confidence': max(predicted['label_probs']),
                'text': labels[np.argmax(label_probs)],
            }
        except:
            raise

    @classmethod
    # the class method that takes `Target` inputs, and output a `Label` object.
    def model_predict(cls, 
        predictor: Predictor, 
        premise: Target, 
        hypothesis: Target, 
        groundtruth: Label) -> 'Label':
        answer = None
        if not predictor:
            return answer
        predicted = predictor.predict(premise.get_text(), hypothesis.get_text())
        if not predicted:
            return None
        answer = PredefinedLabel(
            model=predictor.name, 
            qid=premise.qid,
            text=predicted['text'], 
            vid=max([premise.vid, hypothesis.vid, groundtruth.vid] ))
        answer.compute_perform(groundtruths=groundtruth)
        answer.set_perform(confidence=predicted['confidence'])
        return answer

