In [1]:
!pip install PyDrive
import os
import IPython.display as ipd
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)



In [None]:
!nvidia-smi
!lscpu

In [3]:
!pip install datasets==1.9.0
!pip install transformers==4.7.0
!pip install sentencepiece==0.1.96
!pip install sentence-transformers==2.0.0

Collecting datasets==1.9.0
[?25l  Downloading https://files.pythonhosted.org/packages/86/27/9c91ddee87b06d2de12f134c5171a49890427e398389f07f6463485723c3/datasets-1.9.0-py3-none-any.whl (262kB)
[K     |█▎                              | 10kB 18.2MB/s eta 0:00:01[K     |██▌                             | 20kB 23.7MB/s eta 0:00:01[K     |███▊                            | 30kB 24.6MB/s eta 0:00:01[K     |█████                           | 40kB 18.5MB/s eta 0:00:01[K     |██████▎                         | 51kB 17.4MB/s eta 0:00:01[K     |███████▌                        | 61kB 16.1MB/s eta 0:00:01[K     |████████▊                       | 71kB 17.2MB/s eta 0:00:01[K     |██████████                      | 81kB 14.5MB/s eta 0:00:01[K     |███████████▎                    | 92kB 15.4MB/s eta 0:00:01[K     |████████████▌                   | 102kB 14.9MB/s eta 0:00:01[K     |█████████████▊                  | 112kB 14.9MB/s eta 0:00:01[K     |███████████████                 | 1

In [4]:
%%writefile rouge.py
"""Computes rouge scores between two text or two list of text.
Implemented based on https://github.com/google-research/google-research/tree/master/rouge
"""
import re
import collections
import numpy as np

import six
from six.moves import map
from six.moves import range


class Score(collections.namedtuple("Score", ["precision", "recall", "fmeasure"])):
    """Tuple containing precision, recall, and f-measure values."""


class AggregateScore(collections.namedtuple("AggregateScore", ["low", "mid", "high"])):
    """Tuple containing confidence intervals for scores."""


class BootstrapAggregator(object):
    """Aggregates scores to provide confidence intervals.

    Sample usage:
      scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'])
      aggregator = Aggregator()
      aggregator.add_scores(scorer.score("one two three", "one two"))
      aggregator.add_scores(scorer.score("one two five six", "seven eight"))
      result = aggregator.aggregate()
      print result
      {'rougeL': AggregateScore(
           low=Score(precision=0.0, recall=0.0, fmeasure=0.0),
           mid=Score(precision=0.5, recall=0.33, fmeasure=0.40),
           high=Score(precision=1.0, recall=0.66, fmeasure=0.80)),
       'rouge1': AggregateScore(
           low=Score(precision=0.0, recall=0.0, fmeasure=0.0),
           mid=Score(precision=0.5, recall=0.33, fmeasure=0.40),
           high=Score(precision=1.0, recall=0.66, fmeasure=0.80))}
    """

    def __init__(self, confidence_interval=0.95, n_samples=1000):
        """Initializes a BootstrapAggregator object.

        Args:
          confidence_interval: Confidence interval to compute on the mean as a
            decimal.
          n_samples: Number of samples to use for bootstrap resampling.

        Raises:
          ValueError: If invalid argument is given.
        """

        if confidence_interval < 0 or confidence_interval > 1:
            raise ValueError("confidence_interval must be in range [0, 1]")
        if n_samples <= 0:
            raise ValueError("n_samples must be positive")

        self._n_samples = n_samples
        self._confidence_interval = confidence_interval
        self._scores = collections.defaultdict(list)

    def add_scores(self, scores):
        """Adds a sample for future aggregation.

        Args:
          scores: Dict mapping score_type strings to a namedtuple object/class
            representing a score.
        """

        for score_type, score in six.iteritems(scores):
            self._scores[score_type].append(score)

    def aggregate(self):
        """Aggregates scores previously added using add_scores.

        Returns:
          A dict mapping score_type to AggregateScore objects.
        """

        result = {}
        for score_type, scores in six.iteritems(self._scores):
            # Stack scores into a 2-d matrix of (sample, measure).
            score_matrix = np.vstack(tuple(scores))
            # Percentiles are returned as (interval, measure).
            percentiles = self._bootstrap_resample(score_matrix)
            # Extract the three intervals (low, mid, high).
            intervals = tuple(
                (scores[0].__class__(*percentiles[j, :]) for j in range(3)))
            result[score_type] = AggregateScore(
                low=intervals[0], mid=intervals[1], high=intervals[2])
        return result

    def _bootstrap_resample(self, matrix):
        """Performs bootstrap resampling on a matrix of scores.

        Args:
          matrix: A 2-d matrix of (sample, measure).

        Returns:
          A 2-d matrix of (bounds, measure). There are three bounds: low (row 0),
          mid (row 1) and high (row 2). Mid is always the mean, while low and high
          bounds are specified by self._confidence_interval (which defaults to 0.95
          meaning it will return the 2.5th and 97.5th percentiles for a 95%
          confidence interval on the mean).
        """

        # Matrix of (bootstrap sample, measure).
        sample_mean = np.zeros((self._n_samples, matrix.shape[1]))
        for i in range(self._n_samples):
            sample_idx = np.random.choice(
                np.arange(matrix.shape[0]), size=matrix.shape[0])
            sample = matrix[sample_idx, :]
            sample_mean[i, :] = np.mean(sample, axis=0)

        # Take percentiles on the estimate of the mean using bootstrap samples.
        # Final result is a (bounds, measure) matrix.
        percentile_delta = (1 - self._confidence_interval) / 2
        q = 100 * np.array([percentile_delta, 0.5, 1 - percentile_delta])
        return np.percentile(sample_mean, q, axis=0)


class RougeScorer:
    """Calculate rouges scores between two blobs of text.

    Sample usage:
      scorer = RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
      scores = scorer.score('The quick brown fox jumps over the lazy dog',
                            'The quick brown dog jumps on the log.')
    """

    def __init__(self, rouge_types):
        """Initializes a new RougeScorer.

        Valid rouge types that can be computed are:
          rougen (e.g. rouge1, rouge2): n-gram based scoring.
          rougeL: Longest common subsequence based scoring.

        Args:
          rouge_types: A list of rouge types to calculate.
        Returns:
          A dict mapping rouge types to Score tuples.
        """

        self.rouge_types = rouge_types

    @staticmethod
    def _create_ngrams(tokens, n):
        """Creates ngrams from the given list of tokens.

        Args:
          tokens: A list of tokens from which ngrams are created.
          n: Number of tokens to use, e.g. 2 for bigrams.
        Returns:
          A dictionary mapping each bigram to the number of occurrences.
        """

        ngrams = collections.Counter()
        for ngram in (tuple(tokens[i:i + n]) for i in range(len(tokens) - n + 1)):
            ngrams[ngram] += 1
        return ngrams

    @staticmethod
    def _lcs_table(ref, can):
        """Create 2-d LCS score table."""
        rows = len(ref)
        cols = len(can)
        lcs_table = [[0] * (cols + 1) for _ in range(rows + 1)]
        for i in range(1, rows + 1):
            for j in range(1, cols + 1):
                if ref[i - 1] == can[j - 1]:
                    lcs_table[i][j] = lcs_table[i - 1][j - 1] + 1
                else:
                    lcs_table[i][j] = max(lcs_table[i - 1][j], lcs_table[i][j - 1])
        return lcs_table

    @staticmethod
    def _backtrack_norec(t, ref, can):
        """Read out LCS."""
        i = len(ref)
        j = len(can)
        lcs = []
        while i > 0 and j > 0:
            if ref[i - 1] == can[j - 1]:
                lcs.insert(0, i - 1)
                i -= 1
                j -= 1
            elif t[i][j - 1] > t[i - 1][j]:
                j -= 1
            else:
                i -= 1
        return lcs

    def lcs_ind(self, ref, can):
        """Returns one of the longest lcs."""
        t = self._lcs_table(ref, can)
        return self._backtrack_norec(t, ref, can)

    @staticmethod
    def _find_union(lcs_list):
        """Finds union LCS given a list of LCS."""
        return sorted(list(set().union(*lcs_list)))

    def _union_lcs(self, ref, c_list):
        """Find union LCS between a ref sentence and list of candidate sentences.

        Args:
          ref: list of tokens
          c_list: list of list of indices for LCS into reference summary

        Returns:
          List of tokens in ref representing union LCS.
        """
        lcs_list = [self.lcs_ind(ref, c) for c in c_list]
        return [ref[i] for i in self._find_union(lcs_list)]

    def _summary_level_lcs(self, ref_sent, can_sent):
        """ROUGE: Summary-level LCS, section 3.2 in ROUGE paper.

        Args:
          ref_sent: list of tokenized reference sentences
          can_sent: list of tokenized candidate sentences

        Returns:
          summary level ROUGE score
        """
        if not ref_sent or not can_sent:
            return Score(precision=0, recall=0, fmeasure=0)

        m = sum(map(len, ref_sent))
        n = sum(map(len, can_sent))
        if not n or not m:
            return Score(precision=0, recall=0, fmeasure=0)

        # get token counts to prevent double counting
        token_cnts_r = collections.Counter()
        token_cnts_c = collections.Counter()
        for s in ref_sent:
            # s is a list of tokens
            token_cnts_r.update(s)
        for s in can_sent:
            token_cnts_c.update(s)

        hits = 0
        for r in ref_sent:
            lcs = self._union_lcs(r, can_sent)
            # Prevent double-counting:
            # The paper describes just computing hits += len(_union_lcs()),
            # but the implementation prevents double counting. We also
            # implement this as in version 1.5.5.
            for t in lcs:
                if token_cnts_c[t] > 0 and token_cnts_r[t] > 0:
                    hits += 1
                    token_cnts_c[t] -= 1
                    token_cnts_r[t] -= 1

        recall = hits / m
        precision = hits / n
        fmeasure = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0.0
        return Score(precision=precision, recall=recall, fmeasure=fmeasure)

    def _score_lcs(self, target_tokens, prediction_tokens):
        """Computes LCS (Longest Common Subsequence) rouge scores.

        Args:
          target_tokens: Tokens from the target text.
          prediction_tokens: Tokens from the predicted text.
        Returns:
          A Score object containing computed scores.
        """

        if not target_tokens or not prediction_tokens:
            return Score(precision=0, recall=0, fmeasure=0)

        # Compute length of LCS from the bottom up in a table (DP appproach).
        lcs_table = self._lcs_table(target_tokens, prediction_tokens)
        lcs_length = lcs_table[-1][-1]

        precision = lcs_length / len(prediction_tokens)
        recall = lcs_length / len(target_tokens)
        fmeasure = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0.0

        return Score(precision=precision, recall=recall, fmeasure=fmeasure)

    @staticmethod
    def _score_ngrams(target_ngrams, prediction_ngrams):
        """Compute n-gram based rouge scores.

        Args:
          target_ngrams: A Counter object mapping each ngram to number of
            occurrences for the target text.
          prediction_ngrams: A Counter object mapping each ngram to number of
            occurrences for the prediction text.
        Returns:
          A Score object containing computed scores.
        """

        intersection_ngrams_count = 0
        for ngram in six.iterkeys(target_ngrams):
            intersection_ngrams_count += min(target_ngrams[ngram],
                                             prediction_ngrams[ngram])
        target_ngrams_count = sum(target_ngrams.values())
        prediction_ngrams_count = sum(prediction_ngrams.values())

        precision = intersection_ngrams_count / max(prediction_ngrams_count, 1)
        recall = intersection_ngrams_count / max(target_ngrams_count, 1)
        fmeasure = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0.0

        return Score(precision=precision, recall=recall, fmeasure=fmeasure)

    def score(self, target, prediction):
        """Calculates rouge scores between the target and prediction.

        Args:
          target: Text containing the target (ground truth) text.
          prediction: Text containing the predicted text.
        Returns:
          A dict mapping each rouge type to a Score object.
        Raises:
          ValueError: If an invalid rouge type is encountered.
        """

        target_tokens = re.split(r"\s+", target)
        prediction_tokens = re.split(r"\s+", prediction)
        result = {}

        for rouge_type in self.rouge_types:
            if rouge_type == "rougeL":
                # Rouge from longest common subsequences.
                result[rouge_type] = self._score_lcs(target_tokens, prediction_tokens)
            elif rouge_type == "rougeLsum":
                # Note: Does not support multi-line text.
                def get_sents(text):
                    # Assume sentences are separated by newline.
                    sentences = six.ensure_str(text).split("\n")
                    sentences = [x for x in sentences if len(x)]
                    return sentences

                target_tokens_list = [re.split(r"\s+", s) for s in get_sents(target)]
                prediction_tokens_list = [re.split(r"\s+", s) for s in get_sents(prediction)]
                result[rouge_type] = self._summary_level_lcs(target_tokens_list, prediction_tokens_list)
            elif re.match(r"rouge[0-9]$", six.ensure_str(rouge_type)):
                # Rouge from n-grams.
                n = int(rouge_type[5:])
                if n <= 0:
                    raise ValueError("rougen requires positive n: %s" % rouge_type)
                target_ngrams = self._create_ngrams(target_tokens, n)
                prediction_ngrams = self._create_ngrams(prediction_tokens, n)
                result[rouge_type] = self._score_ngrams(target_ngrams, prediction_ngrams)
            else:
                raise ValueError("Invalid rouge type: %s" % rouge_type)

        return result

    def compute(self, predictions, references):
        """Calculates average rouge scores for a list of hypotheses and references

        Args:
          predictions: List of predictions to score. Each predictions should be a string with tokens separated by
          spaces.
          references: List of reference for each prediction. Each reference should be a string with tokens
          separated by spaces.
        Returns:
          Aggregated scores
        """

        assert len(references) == len(predictions), "Length of references and predictions must be equal!"

        aggregator = BootstrapAggregator()
        for ref, pred in zip(references, predictions):
            score = self.score(ref, pred)
            aggregator.add_scores(score)
        result = aggregator.aggregate()

        return result


Writing rouge.py


In [5]:
import gc
import os
import time
import json
import torch
import datasets
import transformers

from rouge import RougeScorer
from transformers import BertTokenizerFast, EncoderDecoderConfig, EncoderDecoderModel

print()
print('transformers', transformers.__version__)
print('torch', torch.__version__)
print()

# load rouge for validation
# rouge = datasets.load_metric("rouge")

# If there's a GPU available...
if torch.cuda.is_available():    
    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")



transformers 4.7.0
torch 1.9.0+cu102

There are 1 GPU(s) available.
We will use the GPU: Tesla T4


In [6]:
class Summarization:
    def __init__(self, model_name, model_type):
        self.model_name = model_name
        self.model_type = model_type.lower()
        if self.model_type == "bert2bert":
            self.tokenizer = BertTokenizerFast.from_pretrained(model_name)
            self.config = EncoderDecoderConfig.from_pretrained(model_name)
            self.model = EncoderDecoderModel.from_pretrained(model_name, config=self.config)
        else:
            print(f'model_type not supported!')
            return

    @staticmethod
    def load_dataset_test_file(dataset_name, dataset_path, **kwargs):
        if dataset_name.lower() == "wiki-summary-v1.0.0":
            if not os.path.exists(dataset_path):
                print(f'{dataset_path} not exists!')
                return
            test_set = datasets.load_dataset(dataset_path, '1.0.0', split='test', cache_dir=None)
            return test_set
        if dataset_name.lower() == "wiki-summary-v2.0.0":
            if not os.path.exists(dataset_path):
                print(f'{dataset_path} not exists!')
                return
            test_set = datasets.load_dataset(dataset_path, '2.0.0', split='test', cache_dir=None)
            return test_set
        if dataset_name.lower() == "news-headline-v1.0.0":
            if not os.path.exists(dataset_path):
                print(f'{dataset_path} not exists!')
                return
            test_set = datasets.load_dataset(dataset_path, '1.0.0', split='test', cache_dir=None)
            return test_set

    def bert2bert_summarization_inference(self, sequence_list, device, max_length=512):
        if not self.model or not self.tokenizer:
            print('Something wrong has been happened!')
            return

        inputs = self.tokenizer(
            sequence_list,
            padding="max_length",
            truncation=True,
            max_length=max_length,
            return_tensors="pt"
        )

        gc.collect()
        torch.cuda.empty_cache()
        # Tell pytorch to run this model on the GPU.
        if device.type != 'cpu':
            self.model.cuda()

        input_ids = inputs.input_ids.to(device)
        attention_mask = inputs.attention_mask.to(device)
        outputs = self.model.generate(input_ids, attention_mask=attention_mask)
        generated = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
        return generated

    def bert2bert_evaluation(self, input_data, target_column, device, max_length=512, batch_size=4):
        if not self.model or not self.tokenizer:
            print('Something wrong has been happened!')
            return

        def generate_summary(batch):
            # Tokenizer will automatically set [BOS] <text> [EOS] cut off at BERT max length 512
            inputs = self.tokenizer(
                batch["article"],
                padding="max_length",
                truncation=True,
                max_length=max_length,
                return_tensors="pt"
            )
            input_ids = inputs.input_ids.to(device)
            attention_mask = inputs.attention_mask.to(device)
            outputs = self.model.generate(input_ids, attention_mask=attention_mask)

            # all special tokens including will be removed
            output_str = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)

            batch["predicted_summary"] = output_str
            return batch

        gc.collect()
        torch.cuda.empty_cache()
        # Tell pytorch to run this model on the GPU.
        if device.type != 'cpu':
            self.model.cuda()

        start = time.monotonic()
        results = input_data.map(generate_summary, batched=True, batch_size=batch_size)
        end = time.monotonic()
        print(f'evaluation time: {end - start}')
        print("total evaluation time / #samples:", (end - start) / len(input_data))

        scorer = RougeScorer(['rouge1', 'rouge2', 'rougeL', 'rougeLsum'])
        rouge_output = scorer.compute(predictions=results["predicted_summary"], references=results[target_column])
        for rouge_metric in rouge_output:
            print(rouge_metric, rouge_output[rouge_metric])
        return results


In [7]:
model_name = 'm3hrdadfi/bert2bert-fa-news-headline'
b2b_model = Summarization(model_name=model_name, model_type="bert2bert")
print(b2b_model.config)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1198122.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=3690.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1416836998.0, style=ProgressStyle(descr…


EncoderDecoderConfig {
  "_name_or_path": "m3hrdadfi/bert2bert-fa-news-headline",
  "architectures": [
    "EncoderDecoderModel"
  ],
  "decoder": {
    "_name_or_path": "HooshvareLab/bert-fa-base-uncased",
    "add_cross_attention": true,
    "architectures": [
      "BertForMaskedLM"
    ],
    "attention_probs_dropout_prob": 0.1,
    "bad_words_ids": null,
    "bos_token_id": null,
    "chunk_size_feed_forward": 0,
    "decoder_start_token_id": null,
    "diversity_penalty": 0.0,
    "do_sample": false,
    "early_stopping": false,
    "encoder_no_repeat_ngram_size": 0,
    "eos_token_id": null,
    "finetuning_task": null,
    "forced_bos_token_id": null,
    "forced_eos_token_id": null,
    "gradient_checkpointing": false,
    "hidden_act": "gelu",
    "hidden_dropout_prob": 0.1,
    "hidden_size": 768,
    "id2label": {
      "0": "LABEL_0",
      "1": "LABEL_1"
    },
    "initializer_range": 0.02,
    "intermediate_size": 3072,
    "is_decoder": true,
    "is_encoder_decoder":

## Sample Inference

In [8]:
sequence_list = [
    "قبل از به وجود آمدن دی سی، در خلا و فضایی عاری از هرگونه حیات که تنها پرایمال مانیتور بود، یهوه بوسیله قدرت های نامحدود دو برادر خلق کرد؛ یکی از آن ها میکائیل دمیورگوس، و دیگری سمائیل نام گرفت که بعدها با عنوان لوسیفر مورنینگ استار شناخته شد. پس از شکل گیری این دو تن، یهوه آن ها را هدایت نمود و به آن ها چگونگی استفاده از قدرت هایشان را آموخت، در نتیجه آن ها شکلی از خلقت را ایجاد کردند که هم اکنون به عنوان فرضیه چندجهانی دی سی شناخته می شود. میلیاردها سال پیش، لوسیفر فرشته مقرب دست به شورشی علیه پادشاهی بهشت زد و در نتیجه به فضایی عاری از ماده و فاقد هر گونه شکل تحت عنوان چائوپلازم تبعید شد. سپس چائوپلازم تبدیل بهک فضای متروک، ویران و گستره ای تهی با عنوان دوزخ شد، مقصد نهایی برای ارواح ملعون، جایی که مورنینگ استار فرمانروایی می کرد و در انتظار روزی بود تا بتواند دوباره آزاد شود. زمانی که تاریکی اعظم (شیطان وحشی بزرگ) بیدار شده و بازگشت، لوسیفر مجبور شد قدرت خود را با او سهیم شود و فرمانروایی خود را با بعل الذباب و عزازیل به اشتراک گذاشت. بدین سبب سه قدرت مثلثی شکل گرفتند، اما با این حال لوسیفر بخش کثیر قدرت را برای خود نگاه داشت. زمانی فرار رسید که دیریم یکی از اندلس برای جستجوی سکان خود که از او به سرقت رفته بود وارد دوزخ شد. دیریم پس از ورود به جهنم در یک نبرد ذهنی با یک دیو خبیث قدرتمند شرکت کرد و خواستار سکان دزدیده شده خود بود. دیریم پس از اینکه سکان خود را بازیافت لوسیفر را در مقابل تمام شیاطین دوزخ تحقیر کرد، و مورنینگ استار در آن روز سوگند به نابودی دیریم نمود"
]
b2b_model.bert2bert_summarization_inference(sequence_list, device, max_length=512)

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


['اصابت اتم اهدا شده توسط بیندس ، منجر به نابودی دی سی ای ای شد.']

## wiki-summary dataset

In [9]:
!git clone https://github.com/m3hrdadfi/wiki-summary
!ls wiki-summary
!ls wiki-summary/datasets
!ls wiki-summary/datasets/wiki_summary_persian

Cloning into 'wiki-summary'...
remote: Enumerating objects: 74, done.[K
remote: Counting objects: 100% (74/74), done.[K
remote: Compressing objects: 100% (54/54), done.[K
remote: Total 74 (delta 24), reused 63 (delta 13), pack-reused 0[K
Unpacking objects: 100% (74/74), done.
app	CHANGELOG.md	 datasets  model_cards	README.md
assets	CONTRIBUTING.md  LICENSE   notebooks	requirements.txt
example.py  wiki_summary_persian
dataset_info.json  wiki_summary_persian.py
README.md	   wiki_summary_persian.py.lock


### wiki-summary v1.0.0

In [10]:
wiki_summary_v1_test_set = b2b_model.load_dataset_test_file(
    dataset_name="wiki-summary-v1.0.0", 
    dataset_path="./wiki-summary/datasets/wiki_summary_persian"
    )
print('Wiki Summary VERSION 1.0.0')
print(wiki_summary_v1_test_set)

Downloading and preparing dataset wiki_summary_persian/1.0.0 (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /root/.cache/huggingface/datasets/wiki_summary_persian/1.0.0/1.0.0/b651bfcb5d3f8d5db4d7f00e018a1e8bd6484df4a934577b2c8b615ac09827be...


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Downloading', max=1.0, style=ProgressSt…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=295723.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=329739.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Downloading', max=1.0, style=ProgressSt…




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Dataset wiki_summary_persian downloaded and prepared to /root/.cache/huggingface/datasets/wiki_summary_persian/1.0.0/1.0.0/b651bfcb5d3f8d5db4d7f00e018a1e8bd6484df4a934577b2c8b615ac09827be. Subsequent calls will reuse this data.
Wiki Summary VERSION 1.0.0
Dataset({
    features: ['article', 'highlights', 'id'],
    num_rows: 5637
})


In [11]:
!nvidia-smi
!lscpu

Tue Jul 13 08:33:25 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P0    56W / 149W |   2161MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [12]:
evaluation_results_smv1 = b2b_model.bert2bert_evaluation(wiki_summary_v1_test_set, "highlights", device, max_length=512, batch_size=32)

HBox(children=(FloatProgress(value=0.0, max=177.0), HTML(value='')))


evaluation time: 1269.651270664
total evaluation time / #samples: 0.22523527952173145
rouge1 AggregateScore(low=Score(precision=0.27702191562778794, recall=0.04162419660069148, fmeasure=0.0685804489705628), mid=Score(precision=0.2810553406394162, recall=0.04252999466164028, fmeasure=0.06992581787302656), high=Score(precision=0.28517796097883735, recall=0.043389704802652015, fmeasure=0.0712020898981893))
rouge2 AggregateScore(low=Score(precision=0.03589387902228404, recall=0.004903401884599941, fmeasure=0.008207009182905053), mid=Score(precision=0.03815331856202768, recall=0.0052556853847134585, fmeasure=0.008736113939421222), high=Score(precision=0.040593519484686785, recall=0.005636817716308438, fmeasure=0.009333854277651533))
rougeL AggregateScore(low=Score(precision=0.24120081576992677, recall=0.03607228061509745, fmeasure=0.05946159341948891), mid=Score(precision=0.24481864412301763, recall=0.03688851553193205, fmeasure=0.06061620569432325), high=Score(precision=0.2482282153616931

In [30]:
i = 0
for row in evaluation_results_smv1:
  if i==25:
    break
  print(row)
  i += 1

{'article': 'پدر گوهرشادآغا ، « غیاث\u200cالدین ترخان » بود که از بزرگان خاندان تیموری به\u200cشمار می\u200cآمد و در جنگ\u200cهای بزرگی شرکت داشت و لقب « ترخان » به او داده شده بود . ترخان یا آغاجی یا آغجی ، عنوان منصب خاصی در دستگاه امرای ماوراءالنهر و خراسان بوده\u200cاست که دارندهٔ این منصب ، واسطه میان سلطان و مردم بوده ، و به خاطر حشمت و نفوذی که داشته ، بدون التزام به رعایت نوبت و رخصت ، نزد سلطان بار می\u200cیافته\u200cاست . در دوره ایلخانی ، اغلب برای متصدی عنوان « آغاجی » از لقب « ترخان » استفاده می\u200cکرده\u200cاند . مادر او « بانو خان\u200cزاده بیگم » بود که در ماه رجب سال ۸۱۴ ه.ق در مشهد درگذشت و در جوار آرامگاه رضا مدفون است . گوهرشاد همراه با برادرش امیر قره یوسف (۷۹۰ - ۸۰۲ ه.ق) که در دربار تیموریان در هرات وزیر بود نقش مهمی در تاریخ اوایل دوره تیموری ایفا کردند . تحت حمایت او ، زبان فارسی و فرهنگ ایرانی به عنصر اصلی در دربار تیموریان ارتقاء یافت . او و همسرش سلطان شاهرخ تیموری ، نوعی رنسانس فرهنگی ، از طریق صرف حمایت بی\u200cدریغ خود از هنر و جذب هنرمندان ، معماران ، ف

In [28]:
output_file_name = "summarization_wiki-summary-v1.0.0_{}_outputs.json".format(model_name.replace('/','-'))
with open(output_file_name, "w", encoding='utf8') as output_file:
  for row in evaluation_results_smv1:
    output_file.write('{}\n'.format(json.dumps(row, ensure_ascii=False)))
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
upload = drive.CreateFile({'title': output_file_name})
upload.SetContentFile(output_file_name)
upload.Upload()

### wiki-summary v2.0.0

In [22]:
wiki_summary_v2_test_set = b2b_model.load_dataset_test_file(
    dataset_name="wiki-summary-v2.0.0", 
    dataset_path="./wiki-summary/datasets/wiki_summary_persian"
    )
print('Wiki Summary VERSION 2.0.0')
print(wiki_summary_v2_test_set)

Downloading and preparing dataset wiki_summary_persian/2.0.0 (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /root/.cache/huggingface/datasets/wiki_summary_persian/2.0.0/2.0.0/b651bfcb5d3f8d5db4d7f00e018a1e8bd6484df4a934577b2c8b615ac09827be...


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Downloading', max=1.0, style=ProgressSt…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=164464.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=162568.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1479917.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Dataset wiki_summary_persian downloaded and prepared to /root/.cache/huggingface/datasets/wiki_summary_persian/2.0.0/2.0.0/b651bfcb5d3f8d5db4d7f00e018a1e8bd6484df4a934577b2c8b615ac09827be. Subsequent calls will reuse this data.
Wiki Summary VERSION 2.0.0
Dataset({
    features: ['article', 'highlights', 'id'],
    num_rows: 3754
})


In [23]:
!nvidia-smi
!lscpu

Tue Jul 13 09:00:10 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P0    56W / 149W |  10275MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [24]:
evaluation_results_smv2 = b2b_model.bert2bert_evaluation(wiki_summary_v2_test_set, "highlights", device, max_length=512, batch_size=32)

HBox(children=(FloatProgress(value=0.0, max=118.0), HTML(value='')))


evaluation time: 884.3910192189999
total evaluation time / #samples: 0.23558631305780497
rouge1 AggregateScore(low=Score(precision=0.32603652525490723, recall=0.041815281321700926, fmeasure=0.07203106975351434), mid=Score(precision=0.33122173856244086, recall=0.04269161205942919, fmeasure=0.07345727989105882), high=Score(precision=0.33650893348310273, recall=0.04366302360886048, fmeasure=0.07492538433199793))
rouge2 AggregateScore(low=Score(precision=0.047894710775747876, recall=0.005604588675446627, fmeasure=0.009729161333126271), mid=Score(precision=0.051057330821918434, recall=0.006010721852322395, fmeasure=0.010425143335508223), high=Score(precision=0.054098643926765874, recall=0.006390720469694649, fmeasure=0.0110702640599621))
rougeL AggregateScore(low=Score(precision=0.2790997335471631, recall=0.03526236654225553, fmeasure=0.060925077782594365), mid=Score(precision=0.28373505039450586, recall=0.03605402802579599, fmeasure=0.06214569901645958), high=Score(precision=0.28852805459

In [29]:
i = 0
for row in evaluation_results_smv2:
  if i==25:
    break
  print(row)
  i += 1

{'article': 'قوی سیاه روایتگر ماجرای زندگی نینا سیرز (ناتالی پورتمن) دختریست که همه دوران کودکی و نوجوانی خود را به فراگیری و تمرین ممتد رقص باله گذرانیده است. نینا به عنوان یک بالرین حرفه ای و ستارهک شرکت معتبر، در تلاش برای به دست آوردن نقش اول باله معروف دریاچه قو اثر چایکوفسکی است. اما مدیر شرکت و طراح رقص های این باله (به بازی ونسان کسل)، که در مورد توانایی های نینا برای بازی هم زمان در دو نقش قوی سفید معصوم و قوی سیاه اغواگر مطمئن نیست، قابلیت های او را زیر سؤال می برد. شخصیت نینا سیرز که ناتالی پورتمن نقش آن را ایفا کرده، دچار نوعی وسواس شدید برای تکامل در حرفه خود در حد یک بیماری روانی است. او دستخوش یک اراده با میل قوی است و توانایی انعطاف پذیری و رهایی از هدف خود را ندارد. شخصیت لیلی رقیب نینا با بازی میلا کونیس، فردی است که متقابلا بیشتر در پی آسودگی و صفای زندگی است تا تکامل گرایی. مادر نینا با نقش آفرینی باربارا هرشی، مادری است بیش از حد مراقب که آرزو دارد دخترش به موفقیتی دست یابد که خودش به آن نرسیده است. دیگر شخصیت فیلم، بث، یک ستاره پیشین شرکت باله است که وینونا رایدر 

In [27]:
output_file_name = "summarization_wiki-summary-v2.0.0_{}_outputs.json".format(model_name.replace('/','-'))
with open(output_file_name, "w", encoding='utf8') as output_file:
  for row in evaluation_results_smv2:
    output_file.write('{}\n'.format(json.dumps(row, ensure_ascii=False)))
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
upload = drive.CreateFile({'title': output_file_name})
upload.SetContentFile(output_file_name)
upload.Upload()

## news-headline dataset

In [8]:
!git clone https://github.com/m3hrdadfi/news-headline-generation
!ls news-headline-generation
!ls news-headline-generation/datasets
!ls news-headline-generation/datasets/news_headline

Cloning into 'news-headline-generation'...
remote: Enumerating objects: 58, done.[K
remote: Counting objects: 100% (58/58), done.[K
remote: Compressing objects: 100% (36/36), done.[K
remote: Total 58 (delta 18), reused 53 (delta 14), pack-reused 0[K
Unpacking objects: 100% (58/58), done.
app	CHANGELOG.md	 datasets  model_cards	README.md
assets	CONTRIBUTING.md  LICENSE   notebooks	requirements.txt
news_headline
news_headline.py


In [9]:
news_headline_v1_test_set = b2b_model.load_dataset_test_file(
    dataset_name="news-headline-v1.0.0", 
    dataset_path="./news-headline-generation/datasets/news_headline"
    )
print('News Headline VERSION 1.0.0')
print(news_headline_v1_test_set)

Downloading and preparing dataset news_headline/1.0.0 (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /root/.cache/huggingface/datasets/news_headline/1.0.0/1.0.0/04ddafb3475601523b89c9236f01bc7eb0ba9aa123980ae1906a2cec60154fd3...


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Downloading', max=1.0, style=ProgressSt…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=269850.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=299867.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Downloading', max=1.0, style=ProgressSt…




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Dataset news_headline downloaded and prepared to /root/.cache/huggingface/datasets/news_headline/1.0.0/1.0.0/04ddafb3475601523b89c9236f01bc7eb0ba9aa123980ae1906a2cec60154fd3. Subsequent calls will reuse this data.
News Headline VERSION 1.0.0
Dataset({
    features: ['article', 'headline', 'id'],
    num_rows: 3896
})


In [10]:
!nvidia-smi
!lscpu

Tue Jul 13 09:48:20 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   50C    P8    11W /  70W |      3MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [11]:
evaluation_results_nhv1 = b2b_model.bert2bert_evaluation(news_headline_v1_test_set, "headline", device, max_length=512, batch_size=32)

HBox(children=(FloatProgress(value=0.0, max=122.0), HTML(value='')))

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)



evaluation time: 475.74925401599995
total evaluation time / #samples: 0.12211223152361395
rouge1 AggregateScore(low=Score(precision=0.40176031937471357, recall=0.3835714211820478, fmeasure=0.38420980669364346), mid=Score(precision=0.4083000205044857, recall=0.3896202315435775, fmeasure=0.3899674269153808), high=Score(precision=0.4154712722683046, recall=0.3961188899582133, fmeasure=0.3964860475692236))
rouge2 AggregateScore(low=Score(precision=0.21648841781278444, recall=0.20376797336194857, fmeasure=0.20490890199180734), mid=Score(precision=0.2225603849793224, recall=0.20966080188744898, fmeasure=0.21089258849907377), high=Score(precision=0.2293769620611666, recall=0.21584621435807094, fmeasure=0.21737178702434393))
rougeL AggregateScore(low=Score(precision=0.36861675766596214, recall=0.3520092210152118, fmeasure=0.35242322303281076), mid=Score(precision=0.37547619892930645, recall=0.35822036637636656, fmeasure=0.3586950667754881), high=Score(precision=0.3824753797635864, recall=0.36

In [12]:
i = 0
for row in evaluation_results_nhv1:
  if i==25:
    break
  print(row)
  i += 1

{'article': 'از : هومن سیسان اکنون بیش از دو ماه است که از ارائه بسته پیشنهادی گروه پنج به علاوه یک به منظور تشویق مقامات ایرانی برای توقف برنامه اتمی این کشور می گذرد . اما تهران همچنان پاسخ روشنی به پیشنهادات قدرت های جهانی ، شامل ارائه مشوق هایی در قبال توقف برنامه اتمی ایران ، نداده است . مقامات تهران بی آنکه تاریخ مشخصی را برای ارائه پاسخ نهایی خود تعیین کرده باشند ، همچنان خواستار ادامه گفتگوها با اتحادیه اروپا هستند . هرچند اتحادیه اروپا و ایالات متحده بر لزوم توجه به تلاش های دیپلماتیک تاکید دارند ، ولی بی نتیجه ماندن گفتگوها ، برنگرانی ها و گمانه زنی ها نسبت به عواقب ادامه بحران اتمی ایران افزوده است . جان بولتون ، سفیر سابق آمریکا در سازمان ملل و از کارشناسان سیاست خارجی ایالات متحده ، در گفتگو با صدای آمریکا با ابراز بدبینی نسبت به اثربخش بودن گفتگوها ، هدف اصلی ایران از مذاکرات را اتلاف وقت می داند . وی می گوید : « فکر می کنم آنها به توانایی تکنولوژیک و علمی ، که بسیار پیچیده است ، دست یافته اند ؛ هم برای ساختن سلاح اتمی و هم به کاری گیری آن در موشک های قاره پیما.فکر نمی کن

In [14]:
output_file_name = "summarization_news-headline-v1.0.0_{}_outputs.json".format(model_name.replace('/','-'))
with open(output_file_name, "w", encoding='utf8') as output_file:
  for row in evaluation_results_nhv1:
    output_file.write('{}\n'.format(json.dumps(row, ensure_ascii=False)))
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
upload = drive.CreateFile({'title': output_file_name})
upload.SetContentFile(output_file_name)
upload.Upload()