In [1]:
!nvidia-smi

Sat May 21 12:54:49 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   50C    P8    11W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
!pip install -q datasets transformers rouge_score sacrebleu sacremoses git+https://github.com/google-research/bleurt.git

[K     |████████████████████████████████| 346 kB 10.7 MB/s 
[K     |████████████████████████████████| 4.2 MB 45.3 MB/s 
[K     |████████████████████████████████| 92 kB 11.4 MB/s 
[K     |████████████████████████████████| 880 kB 41.3 MB/s 
[K     |████████████████████████████████| 352 kB 45.4 MB/s 
[K     |████████████████████████████████| 1.2 MB 44.7 MB/s 
[K     |████████████████████████████████| 1.1 MB 28.3 MB/s 
[K     |████████████████████████████████| 84 kB 1.8 MB/s 
[K     |████████████████████████████████| 140 kB 58.1 MB/s 
[K     |████████████████████████████████| 212 kB 52.4 MB/s 
[K     |████████████████████████████████| 127 kB 54.9 MB/s 
[K     |████████████████████████████████| 596 kB 58.3 MB/s 
[K     |████████████████████████████████| 6.6 MB 61.7 MB/s 
[K     |████████████████████████████████| 144 kB 75.4 MB/s 
[K     |████████████████████████████████| 94 kB 4.2 MB/s 
[K     |████████████████████████████████| 271 kB 61.2 MB/s 
[K     |████████████████████

In [3]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [4]:
import torch
import random

RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

In [103]:
import pandas as pd
import os

DATA_PATH = "/gdrive/MyDrive/final-project/post-refactor/data/metrics/"
GEN_NAME = "bigbird_less_samples_out.csv"
OUT_NAME = "bigbird_less_samples_metric.csv"
DEVICE = "cuda"

# load generated text
df = pd.read_csv(os.path.join(DATA_PATH, GEN_NAME)).drop(columns=["Unnamed: 0"])

## Compute metrics

In [97]:
from datasets import load_metric
import numpy as np
import nltk
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("google/bigbird-pegasus-large-bigpatent")

ROUGE = load_metric('rouge')
SACREBLEU = load_metric('sacrebleu')
BLEURT = load_metric('bleurt', 'bleurt-large-512')
SARI = load_metric('sari')

def compute_metrics(input, reference, predicted): 
  rouge_scores = ROUGE.compute(references=reference, predictions=predicted)
  rouge_scores = { k: v.mid.fmeasure * 100 for k, v in rouge_scores.items() }

  sacrebleu_score = SACREBLEU.compute(predictions=predicted, references=[reference])
  sacrebleu_score = sacrebleu_score["score"]

  bleurt_score = BLEURT.compute(predictions=predicted, references=reference)
  bleurt_score = bleurt_score["scores"][0] * 100

  sari_score = SARI.compute(predictions=predicted, sources=input, references=[reference])
  sari_score = sari_score["sari"]
  
  return {
      "sacrebleu": sacrebleu_score, 
      **rouge_scores, 
      "bleurt": bleurt_score,
      "sari": sari_score
  }

Downloading:   0%|          | 0.00/1.17k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.03k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.83M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/3.35M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/775 [00:00<?, ?B/s]

INFO:tensorflow:Reading checkpoint /root/.cache/huggingface/metrics/bleurt/bleurt-large-512/downloads/extracted/299e33e80b83c78cc60e485384c7804f6ec1fb36c2013c5078257c17a82719ca/bleurt-large-512.
INFO:tensorflow:Config file found, reading.
INFO:tensorflow:Will load checkpoint bert_custom
INFO:tensorflow:Loads full paths and checks that files exists.
INFO:tensorflow:... name:bert_custom
INFO:tensorflow:... vocab_file:vocab.txt
INFO:tensorflow:... bert_config_file:bert_config.json
INFO:tensorflow:... do_lower_case:True
INFO:tensorflow:... max_seq_length:512
INFO:tensorflow:Creating BLEURT scorer.
INFO:tensorflow:Creating WordPiece tokenizer.
INFO:tensorflow:WordPiece tokenizer instantiated.
INFO:tensorflow:Creating Eager Mode predictor.
INFO:tensorflow:Loading model.
INFO:tensorflow:BLEURT initialized.


In [104]:
from tqdm.notebook import tqdm

for idx, row in tqdm(list(df.iterrows())):
  for col in ["greedy_gen", "beam1_gen"]:
    ref = [row["claim"]]
    input = [row["summary"]]
    pred = [row[col]]

    metrics = compute_metrics(input, ref, pred)

    for m, v in metrics.items():
      metric_name = f"{col}_{m}"
      df.loc[idx, metric_name] = v

  0%|          | 0/60 [00:00<?, ?it/s]

In [105]:
df.to_csv(os.path.join(DATA_PATH, OUT_NAME))